Time Series Forecasting
Time Changes Everything
Most ML algorithms assume observations are independent . But what if tomorrow’s stock price depends on today’s? What if next month’s sales follow a seasonal pattern?
Welcome to Time Series - where order matters .
The Restaurant Sales Problem
You manage a restaurant chain. You need to predict:
How many customers next week?
How much inventory to order?
How many staff to schedule?
You have 3 years of daily sales data. Let’s find the patterns!
Components of Time Series
Every time series can be decomposed into components:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose
# Generate sample time series data
np.random.seed( 42 )
dates = pd.date_range( '2020-01-01' , periods = 730 , freq = 'D' )
# Create components
trend = np.linspace( 100 , 200 , 730 ) # Upward trend
seasonal = 30 * np.sin( 2 * np.pi * np.arange( 730 ) / 365 ) # Yearly seasonality
noise = np.random.normal( 0 , 10 , 730 ) # Random noise
sales = trend + seasonal + noise
ts = pd.Series(sales, index = dates)
# Decompose
decomposition = seasonal_decompose(ts, model = 'additive' , period = 365 )
# Plot
fig, axes = plt.subplots( 4 , 1 , figsize = ( 12 , 10 ))
axes[ 0 ].plot(ts)
axes[ 0 ].set_title( 'Original Time Series' )
axes[ 1 ].plot(decomposition.trend)
axes[ 1 ].set_title( 'Trend Component' )
axes[ 2 ].plot(decomposition.seasonal)
axes[ 2 ].set_title( 'Seasonal Component' )
axes[ 3 ].plot(decomposition.resid)
axes[ 3 ].set_title( 'Residual (Noise)' )
plt.tight_layout()
plt.show()
The Four Components
Component Description Example Trend Long-term direction Growing customer base Seasonality Regular patterns Summer ice cream sales Cyclical Irregular long patterns Economic cycles Noise Random variation Weather effects
Stationarity: The Foundation
Most time series methods require stationarity - the statistical properties don’t change over time.
Testing for Stationarity
from statsmodels.tsa.stattools import adfuller
def test_stationarity ( series , name = "Series" ):
"""Test if series is stationary using Augmented Dickey-Fuller test."""
result = adfuller(series.dropna())
print ( f " \n { name } - Stationarity Test:" )
print ( f " ADF Statistic: { result[ 0 ] :.4f} " )
print ( f " p-value: { result[ 1 ] :.4f} " )
print ( f " Stationary: { 'Yes' if result[ 1 ] < 0.05 else 'No' } " )
return result[ 1 ] < 0.05
# Test our series
test_stationarity(ts, "Raw Sales" )
# Make it stationary through differencing
ts_diff = ts.diff().dropna()
test_stationarity(ts_diff, "Differenced Sales" )
Making Data Stationary
# Differencing: subtract previous value
ts_diff = ts.diff()
# Log transform: stabilize variance
ts_log = np.log(ts)
# Both combined
ts_log_diff = np.log(ts).diff()
# Plot transformations
fig, axes = plt.subplots( 2 , 2 , figsize = ( 12 , 8 ))
axes[ 0 , 0 ].plot(ts)
axes[ 0 , 0 ].set_title( 'Original' )
axes[ 0 , 1 ].plot(ts_diff)
axes[ 0 , 1 ].set_title( 'Differenced' )
axes[ 1 , 0 ].plot(ts_log)
axes[ 1 , 0 ].set_title( 'Log Transformed' )
axes[ 1 , 1 ].plot(ts_log_diff)
axes[ 1 , 1 ].set_title( 'Log + Differenced' )
plt.tight_layout()
plt.show()
Simple Forecasting Methods
1. Moving Average
def moving_average_forecast ( series , window = 7 ):
"""Forecast using simple moving average."""
return series.rolling( window = window).mean()
# Calculate 7-day and 30-day moving averages
ma_7 = moving_average_forecast(ts, 7 )
ma_30 = moving_average_forecast(ts, 30 )
plt.figure( figsize = ( 12 , 6 ))
plt.plot(ts.values[ - 100 :], label = 'Actual' , alpha = 0.7 )
plt.plot(ma_7.values[ - 100 :], label = '7-day MA' , linewidth = 2 )
plt.plot(ma_30.values[ - 100 :], label = '30-day MA' , linewidth = 2 )
plt.legend()
plt.title( 'Moving Average Smoothing' )
plt.show()
2. Exponential Smoothing
from statsmodels.tsa.holtwinters import SimpleExpSmoothing, ExponentialSmoothing
# Simple Exponential Smoothing
ses = SimpleExpSmoothing(ts).fit( smoothing_level = 0.3 )
ses_forecast = ses.forecast( 30 )
# Holt-Winters (handles trend + seasonality)
hw = ExponentialSmoothing(
ts,
trend = 'add' ,
seasonal = 'add' ,
seasonal_periods = 365
).fit()
hw_forecast = hw.forecast( 30 )
# Plot
plt.figure( figsize = ( 12 , 6 ))
plt.plot(ts.values[ - 100 :], label = 'Historical' )
plt.plot( range ( 100 , 130 ), ses_forecast, label = 'Simple ES' , linestyle = '--' )
plt.plot( range ( 100 , 130 ), hw_forecast, label = 'Holt-Winters' , linestyle = '--' )
plt.axvline( 99 , color = 'gray' , linestyle = ':' )
plt.legend()
plt.title( 'Exponential Smoothing Forecasts' )
plt.show()
ARIMA: The Classic Approach
ARIMA = AutoRegressive Integrated Moving Average
AR(p) : Uses p past values
I(d) : d times differencing for stationarity
MA(q) : Uses q past forecast errors
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
# Plot ACF and PACF to determine p and q
fig, axes = plt.subplots( 1 , 2 , figsize = ( 12 , 4 ))
plot_acf(ts_diff.dropna(), ax = axes[ 0 ], lags = 40 )
plot_pacf(ts_diff.dropna(), ax = axes[ 1 ], lags = 40 )
plt.tight_layout()
plt.show()
# Fit ARIMA model
# Train on first 700 days, test on last 30
train = ts[: 700 ]
test = ts[ 700 :]
model = ARIMA(train, order = ( 1 , 1 , 1 ))
fitted = model.fit()
print (fitted.summary())
# Forecast
forecast = fitted.forecast( len (test))
# Plot
plt.figure( figsize = ( 12 , 6 ))
plt.plot(train.values[ - 100 :], label = 'Training' )
plt.plot( range ( 100 , 100 + len (test)), test.values, label = 'Actual' )
plt.plot( range ( 100 , 100 + len (forecast)), forecast.values, label = 'Forecast' , linestyle = '--' )
plt.axvline( 99 , color = 'gray' , linestyle = ':' )
plt.legend()
plt.title( 'ARIMA Forecast' )
plt.show()
Auto ARIMA
# Using pmdarima for automatic order selection
# pip install pmdarima
from pmdarima import auto_arima
auto_model = auto_arima(
train,
start_p = 0 , start_q = 0 ,
max_p = 5 , max_q = 5 ,
d = None , # Let it determine d
seasonal = True ,
m = 7 , # Weekly seasonality
trace = True ,
error_action = 'ignore' ,
suppress_warnings = True
)
print (auto_model.summary())
SARIMA: Adding Seasonality
SARIMA = Seasonal ARIMA with additional seasonal components:
from statsmodels.tsa.statespace.sarimax import SARIMAX
# SARIMA with weekly seasonality
model = SARIMAX(
train,
order = ( 1 , 1 , 1 ), # (p, d, q)
seasonal_order = ( 1 , 1 , 1 , 7 ) # (P, D, Q, m) m=7 for weekly
)
fitted = model.fit( disp = False )
forecast = fitted.forecast( len (test))
# Calculate RMSE
from sklearn.metrics import mean_squared_error
rmse = np.sqrt(mean_squared_error(test, forecast))
print ( f "SARIMA RMSE: { rmse :.2f} " )
Prophet: Modern Forecasting
Facebook’s Prophet handles trends, seasonality, and holidays automatically:
# pip install prophet
from prophet import Prophet
# Prepare data in Prophet format
df = pd.DataFrame({
'ds' : dates,
'y' : sales
})
# Train/test split
train_df = df[: 700 ]
test_df = df[ 700 :]
# Fit Prophet
prophet = Prophet(
yearly_seasonality = True ,
weekly_seasonality = True ,
daily_seasonality = False
)
prophet.fit(train_df)
# Make future dataframe
future = prophet.make_future_dataframe( periods = 30 )
forecast = prophet.predict(future)
# Plot
prophet.plot(forecast)
plt.title( 'Prophet Forecast' )
plt.show()
# Plot components
prophet.plot_components(forecast)
plt.show()
Adding Holidays
# Define holidays
holidays = pd.DataFrame({
'holiday' : 'christmas' ,
'ds' : pd.to_datetime([ '2020-12-25' , '2021-12-25' , '2022-12-25' ]),
'lower_window' : - 2 , # 2 days before
'upper_window' : 1 # 1 day after
})
prophet = Prophet( holidays = holidays)
ML Approach: Feature Engineering for Time Series
Transform time series into supervised learning:
def create_features ( df , target_col , n_lags = 7 ):
"""Create features from time series for ML models."""
df = df.copy()
# Lag features
for i in range ( 1 , n_lags + 1 ):
df[ f 'lag_ { i } ' ] = df[target_col].shift(i)
# Rolling statistics
df[ 'rolling_mean_7' ] = df[target_col].rolling( 7 ).mean()
df[ 'rolling_std_7' ] = df[target_col].rolling( 7 ).std()
df[ 'rolling_mean_30' ] = df[target_col].rolling( 30 ).mean()
# Date features
if isinstance (df.index, pd.DatetimeIndex):
df[ 'day_of_week' ] = df.index.dayofweek
df[ 'month' ] = df.index.month
df[ 'day_of_month' ] = df.index.day
df[ 'is_weekend' ] = (df.index.dayofweek >= 5 ).astype( int )
return df.dropna()
# Prepare dataset
ts_df = pd.DataFrame({ 'sales' : sales}, index = dates)
ts_ml = create_features(ts_df, 'sales' , n_lags = 7 )
# Features and target
feature_cols = [col for col in ts_ml.columns if col != 'sales' ]
X = ts_ml[feature_cols]
y = ts_ml[ 'sales' ]
# Time series split (don't shuffle!)
split_idx = int ( len (X) * 0.8 )
X_train, X_test = X[:split_idx], X[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]
# Train models
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
models = {
'Random Forest' : RandomForestRegressor( n_estimators = 100 , random_state = 42 ),
'Gradient Boosting' : GradientBoostingRegressor( n_estimators = 100 , random_state = 42 )
}
for name, model in models.items():
model.fit(X_train, y_train)
predictions = model.predict(X_test)
mae = mean_absolute_error(y_test, predictions)
rmse = np.sqrt(mean_squared_error(y_test, predictions))
print ( f " { name } : MAE= { mae :.2f} , RMSE= { rmse :.2f} " )
Cross-Validation for Time Series
Never use random cross-validation for time series! Future data would leak into training.
from sklearn.model_selection import TimeSeriesSplit
# Time series cross-validation
tscv = TimeSeriesSplit( n_splits = 5 )
# Visualize the splits
plt.figure( figsize = ( 12 , 4 ))
for i, (train_idx, test_idx) in enumerate (tscv.split(X)):
plt.plot(train_idx, [i] * len (train_idx), 'b-' , linewidth = 3 , label = 'Train' if i == 0 else '' )
plt.plot(test_idx, [i] * len (test_idx), 'r-' , linewidth = 3 , label = 'Test' if i == 0 else '' )
plt.xlabel( 'Sample index' )
plt.ylabel( 'CV iteration' )
plt.legend()
plt.title( 'Time Series Cross-Validation' )
plt.show()
# Use in model evaluation
from sklearn.model_selection import cross_val_score
rf = RandomForestRegressor( n_estimators = 100 , random_state = 42 )
scores = cross_val_score(rf, X, y, cv = tscv, scoring = 'neg_mean_squared_error' )
rmse_scores = np.sqrt( - scores)
print ( f "CV RMSE: { rmse_scores.mean() :.2f} ± { rmse_scores.std() :.2f} " )
Multi-Step Forecasting
Strategy 1: Recursive
Predict one step, use prediction to predict next:
def recursive_forecast ( model , X_last , n_steps , n_lags ):
"""Recursively forecast multiple steps."""
forecasts = []
current_features = X_last.copy()
for _ in range (n_steps):
pred = model.predict(current_features.reshape( 1 , - 1 ))[ 0 ]
forecasts.append(pred)
# Shift lag features
for i in range (n_lags - 1 , 0 , - 1 ):
current_features[i] = current_features[i - 1 ]
current_features[ 0 ] = pred # lag_1 = prediction
return forecasts
Strategy 2: Direct
Train separate models for each horizon:
def direct_forecast ( df , target_col , horizons = [ 1 , 7 , 14 ]):
"""Train separate models for each forecast horizon."""
models = {}
for h in horizons:
# Create target h steps ahead
df_h = df.copy()
df_h[ 'target' ] = df_h[target_col].shift( - h)
df_h = df_h.dropna()
feature_cols = [col for col in df_h.columns if col not in [target_col, 'target' ]]
X = df_h[feature_cols]
y = df_h[ 'target' ]
# Train model
model = RandomForestRegressor( n_estimators = 100 , random_state = 42 )
model.fit(X[: - h], y[: - h]) # Leave last h for testing
models[h] = model
return models
Evaluation Metrics for Time Series
def evaluate_forecast ( actual , predicted , name = "Model" ):
"""Calculate common time series metrics."""
mae = mean_absolute_error(actual, predicted)
mse = mean_squared_error(actual, predicted)
rmse = np.sqrt(mse)
mape = np.mean(np.abs((actual - predicted) / actual)) * 100
print ( f " \n { name } Metrics:" )
print ( f " MAE: { mae :.2f} " )
print ( f " RMSE: { rmse :.2f} " )
print ( f " MAPE: { mape :.1f} %" )
return { 'MAE' : mae, 'RMSE' : rmse, 'MAPE' : mape}
# Compare models
evaluate_forecast(test, forecast, "ARIMA" )
Common Pitfalls
1. Data Leakage
# WRONG: Using future data for scaling
from sklearn.preprocessing import StandardScaler
# Don't do this!
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X) # Uses all data including test!
# CORRECT: Fit only on training data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test) # Only transform!
2. Ignoring Seasonality
Always check for multiple seasonality types:
Daily (24 hours)
Weekly (7 days)
Monthly (~30 days)
Yearly (365 days)
3. Overfitting to Recent Trends
# Test on multiple time periods, not just the most recent
Key Takeaways
Order Matters Time series data is sequential - never shuffle!
Check Stationarity Most methods require stationary data
Multiple Components Decompose into trend, seasonality, and noise
Proper Validation Use TimeSeriesSplit, not random CV
What’s Next?
Let’s explore the theory behind model performance with bias-variance tradeoff!
Continue to Bias-Variance Tradeoff Understanding the fundamental tradeoff in machine learning