Source code for eqc_models.ml.forecast
- import sys
- import numpy as np
- import pandas as pd
- from sklearn.linear_model import LinearRegression, Ridge
- from .reservoir import QciReservoir
- from .forecastbase import BaseForecastModel
- [docs]
- class ReservoirForecastModel(BaseForecastModel, QciReservoir):
-     """
-     A reservoir based forecast model.
-     Parameters
-     ----------
-     ip_addr: The IP address of the device.
-     num_nodes: Number of reservoir network nodes.
-     feature_scaling: The factor used to scale the reservoir output.
-     num_pads: Size of the pad used in the reservoir input;
-     default: 0.
-     reg_coef: L2 regularization coefficient for linear regression;
-     default: 0.
-         
-     device: The QCi reservoir device. Currently only 'EmuCore' is
-     supported; default: EmuCore.
-     Examples
-     ---------
-     
-     >>> MAX_TRAIN_DAY = 800
-     >>> IP_ADDR = "172.22.19.49"
-     >>> FEATURE_SCALING = 0.1
-     >>> NUM_NODES = 1000
-     >>> NUM_PADS = 100
-     >>> LAGS = 2
-     >>> from contextlib import redirect_stdout
-     >>> import io
-     >>> f = io.StringIO()
-     >>> from eqc_models.ml import ReservoirForecastModel
-     >>> with redirect_stdout(f):
-     ...    model = ReservoirForecastModel(
-     ...        ip_addr=IP_ADDR,
-     ...        num_nodes=NUM_NODES,
-     ...        feature_scaling=FEATURE_SCALING,
-     ...        num_pads=NUM_PADS,
-     ...        device="EmuCore",
-     ...    )
-     ...    model.fit(
-     ...        data=train_df,
-     ...        feature_fields=["norm_cell_prod"],
-     ...        target_fields=["norm_cell_prod"],
-     ...        lags=LAGS,
-     ...        horizon_size=1,
-     ...    )
-     ...    y_train_pred = model.predict(train_df, mode="in_sample")
-     ...    y_test_pred = model.predict(test_df, mode="in_sample")
-     >>> model.close()
-     
-     """
-     
-     def __init__(
-         self,
-         ip_addr,
-         num_nodes,
-         feature_scaling,
-         num_pads: int = 0,
-         reg_coef: float = 0.0,
-         device: str = "EmuCore",
-     ):
-         super(ReservoirForecastModel).__init__()
-         BaseForecastModel.__init__(self)
-         QciReservoir.__init__(self, ip_addr, num_nodes)
-         
-         assert device == "EmuCore", "Unknown device!"
-         self.ip_addr = ip_addr
-         self.num_nodes = num_nodes
-         self.feature_scaling = feature_scaling
-         self.num_pads = num_pads
-         self.reg_coef = reg_coef        
-         self.device = device
-         self.lock_id = None
-         self.lin_model = None
-         self.feature_fields = None
-         self.target_fields = None
-         self.lags = None
-         self.horizon_size = None
-         self.zero_pad_data = None
-         self.train_pad_data = None        
-         
-         self.init_reservoir()
- [docs]
-     def close(self):
-         self.release_lock()
- [docs]
-     def fit(
-         self,
-         data: pd.DataFrame,
-         feature_fields: list,
-         target_fields: list,
-         lags: int = 0,
-         horizon_size: int = 1,
-     ):
-         """A function to train a forecast model.
-         Parameters
-         ----------
-         data: A pandas data frame that contain the time series.
-         feature_fields: A list of fields in the data frame that are as
-         inputs to the reservoir.
-         target_fields: A list of fields in teh data frame that are to be
-         forecasted.
-         lags: Number of lags used; default = 0.
-         horizon_size: Size of the horizon, e.g. number of forecast
-         steps.
-         """
-         
-         num_pads = self.num_pads
-         if num_pads is not None and num_pads > 0:
-             self.zero_pad_data = pd.DataFrame()
-             for item in data.columns:
-                 self.zero_pad_data[item] = np.zeros(shape=(num_pads))
-                 
-             data = pd.concat([self.zero_pad_data, data])
-         
-         
-         fea_data = np.array(data[feature_fields])
-         targ_data = np.array(data[target_fields])
-         X_train, y_train, steps = self.prep_fea_targs(
-             fea_data=fea_data,
-             targ_data=targ_data,
-             window_size=lags + 1,
-             horizon_size=horizon_size,
-         )
-         
-         self.feature_fields = feature_fields
-         self.target_fields = target_fields
-         self.lags = lags
-         self.horizon_size = horizon_size
-         
-         X_train_resp = self.push_reservoir(X_train)
-         if num_pads is not None and num_pads > 0:
-             X_train_resp = X_train_resp[num_pads:]
-             y_train = y_train[num_pads:]
-             
-         
-         
-         self.lin_model = Ridge(alpha=self.reg_coef, fit_intercept=True)
-         self.lin_model.fit(X_train_resp, y_train)
-         
-         y_train_pred = self.lin_model.predict(X_train_resp)
-         
-         train_stats = self.get_stats(y_train, y_train_pred)
-         print("Training stats:", train_stats)
-         if num_pads is not None and num_pads > 0:        
-             self.train_pad_data = data.tail(num_pads)
-         return
- [docs]
-     def predict(
-         self,
-         data: pd.DataFrame,
-         pad_mode: str = "zero",
-         mode: str = "in_sample",
-     ):
-         """A function to get predictions from forecast model.
-         Parameters
-         ----------
-         data: A pandas data frame that contain the time series.
-         pad_mode: Mode of the reservoir input padding, either
-         'last_train' or 'zero'; default: 'zero.
-         
-         mode: A value of 'out_of_sample' predicts the horizon
-         following the time series. A value of 'in_sample' predicts in
-         sample (used for testing); default: in_sample.
-         Returns
-         -------
-         The predictions: numpy.array((horizon_size, num_dims)).
-         """
-         assert self.lin_model is not None, "Model not train yet!"
-         assert mode in ["in_sample", "out_of_sample"], (
-             "Unknown mode <%s>!" % mode
-         )
-         num_pads = self.num_pads
-         if num_pads is not None and num_pads > 0:
-             if pad_mode == "last_train":
-                 pad_data = self.train_pad_data
-             else:
-                 pad_data = self.zero_pad_data
-             
-             data = pd.concat([pad_data, data])
-         
-         num_records = data.shape[0]
-         fea_data = np.array(data[self.feature_fields])
-         targ_data = np.array(data[self.target_fields])
-         
-         if mode == "in_sample":
-             X, y, _ = self.prep_fea_targs(
-                 fea_data=fea_data,
-                 targ_data=targ_data,
-                 window_size=self.lags + 1,
-                 horizon_size=self.horizon_size,
-             )
-         elif mode == "out_of_sample":
-             X = self.prep_out_of_sample(
-                 fea_data=fea_data,
-                 window_size=self.lags + 1,
-                 horizon_size=self.horizon_size,
-             )
-         else:
-             assert False, "Unknown mode <%s>!" % mode
-             
-         X_resp = self.push_reservoir(X)
-         if self.num_pads is not None and self.num_pads > 0:
-             X_resp = X_resp[self.num_pads:]
-             y = y[self.num_pads:]
-             
-         y_pred = self.lin_model.predict(X_resp)
-         
-         if mode == "in_sample":
-             stats = self.get_stats(y, y_pred)
-             print("In-sample prediction stats:", stats)
-         
-         return y_pred