Multiple linear regression on company financials to predict stock prices

July 27, 2020, 2:06 a.m. by Mike Johnson Jr

In this study, I use fundamental data to attempt to predict stock prices. The endeavor was unsuccessful, but I invite anyone to see if they can pick features that increase the accuracy of the model.

Data Sources:

iex_fundamentals.csv: /static/assets/iex_fundamentals.csv

market_cap_july_2020: /static/assets/market_cap_july_2020.csv


In [71]:
import json
import os
import pandas as pd
from sklearn.linear_model import LinearRegression
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score
from pandas_datareader import data as sdata
import quandl
import os
In [72]:
# get fundamental data

fundamentals = pd.read_csv('iex_fundamentals.csv')
fundamentals = fundamentals.loc[fundamentals['currentCash'] != 0]
fundamentals = fundamentals.fillna(0)

# get market caps

market_caps = pd.read_csv('market_cap_july_2020.csv')
In [73]:
market_caps = market_caps[market_caps['market_cap'] != 0]
market_caps.head()
Out[73]:

tickermarket_cap
0AAL5792509790
1AAP10176533724
2AAPL1605699596400
3ABBV171140837400
4ABC21105095280
In [74]:
print(fundamentals.columns)

Index(['ticker', ' reportDate', 'fiscalDate', 'currency', 'currentCash',
       'shortTermInvestments', 'receivables', 'inventory',
       'otherCurrentAssets', 'currentAssets', 'longTermInvestments',
       'propertyPlantEquipment', 'goodwill', 'intangibleAssets', 'otherAssets',
       'totalAssets', 'accountsPayable', 'currentLongTermDebt',
       'otherCurrentLiabilities', 'totalCurrentLiabilities', 'longTermDebt',
       'otherLiabilities', 'minorityInterest', 'totalLiabilities',
       'commonStock', 'retainedEarnings', 'treasuryStock', 'capitalSurplus',
       'shareholderEquity', 'netTangibleAssets', 'totalRevenue',
       'costOfRevenue', 'grossProfit', 'researchAndDevelopment',
       'sellingGeneralAndAdmin', 'operatingExpense', 'operatingIncome',
       'otherIncomeExpenseNet', 'ebit', 'interestIncome', 'pretaxIncome',
       'incomeTax', 'netIncome', 'netIncomeBasic', 'depreciation',
       'changesInReceivables', 'changesInInventories', 'cashChange',
       'cashFlow', 'capitalExpenditures', 'investments',
       'investingActivityOther', 'totalInvestingCashFlows', 'dividendsPaid',
       'netBorrowings', 'otherFinancingCashFlows', 'cashFlowFinancing',
       'exchangeRateEffect'],
      dtype='object')
In [98]:
feature_list = ['totalAssets']

train_X = fundamentals[:100] #[x.tolist() for x in f2016[:100].values]
test_X = fundamentals[101:300]

train_y = market_caps[:100] 
test_y = market_caps[101:300]

train_y = train_y.drop(columns=['ticker'])
train_X = train_X.filter(feature_list)
In [99]:
reg = LinearRegression().fit(train_X, train_y)
In [100]:
test_X = test_X.filter(feature_list)
test_y = test_y.drop(columns=['ticker'])
pred = reg.predict(test_X)
In [101]:
test_X.head()
Out[101]:

totalAssets
1017669885000
10264056000000
10317429000000
1044331227000
105230639000000
In [102]:
print(r2_score(test_y, pred))

-0.5201881572827083
In [ ]:
 

Powered by Froala Editor