Source code for pyreal.transformers.pad
import numpy as np
import pandas as pd
from pyreal.transformers import TransformerBase
[docs]class TimeSeriesPadder(TransformerBase):
"""
A transformer that pads and truncates variable-length time series to equal lengths
"""
[docs] def __init__(self, value, length=None, **kwargs):
"""
Initializes the transformer
Args:
length (int):
Length to pad/truncate time series sequences to. If none, pad to maximum length in
fitting dataset
value (Union[int, float, complex, ndarray, Iterable]):
Object value to pad with
"""
if length is not None and length <= 0:
raise ValueError("Length must be integer >= 0")
self.length = length
self.value = value
super().__init__(**kwargs)
def fit(self, x, **params):
"""
Determines the length to pad to if not set
Args:
x (DataFrame or numpy array of shape (n_instances, n_features)):
The dataset to fit on
Returns:
None
"""
if self.length is None:
if isinstance(x, pd.DataFrame):
self.length = x.shape[1]
else:
self.length = len(max(x, key=lambda x_: len(x_)))
return super().fit(x)
def data_transform(self, x):
"""
Reorders and selects the features in x. If no length has been set
and the transformer has not been fit, pad to the longest subsequence length
Args:
x (numpy array of shape (n_instances, n_features)):
The data to transform
Returns:
numpy array of shape (n_instances, len(columns)):
The data with features selected and reordered
"""
if self.length is None:
length = len(max(x, key=lambda x_: len(x_)))
else:
length = self.length
z = np.full([len(x), length], self.value)
for i, j in enumerate(x):
if len(j) < z.shape[1]:
z[i][0 : len(j)] = j
else:
z[i][0:length] = j[0:length]
return z