SVR Regression for InterpolationΒΆ

In this example, the target turbine in the windpark Tehachapi lacks of wind power and wind speed data. The distribution of the missing data is Not Missing At Random (NMAR). The missing data is interpolated by SVR regression.

# Author: Jendrik Poloczek <>
# License: BSD 3 clause

import matplotlib.pyplot as plt

from windml.datasets.nrel import NREL
from windml.preprocessing.preprocessing import destroy
from windml.preprocessing.preprocessing import interpolate
from windml.visualization.plot_timeseries import plot_timeseries
from sklearn.svm import SVR
from sklearn.grid_search import GridSearchCV
from sklearn.cross_validation import KFold

import matplotlib.pyplot as plt
import matplotlib.dates as md
from pylab import *

from numpy import array, zeros, float32, int32

# get windpark and corresponding target. forecast is for the target turbine
park_id = NREL.park_id['tehachapi']
windpark = NREL().get_windpark(park_id, 5, 2004)
target = windpark.get_target()

measurements = target.get_measurements()[300:1000]
damaged, indices = destroy(measurements, method='nmar', percentage=.80,\
        min_length=10, max_length=100)

neighbors = windpark.get_turbines()[:-1]
nseries = [t.get_measurements()[300:1000] for t in neighbors]

gamma_range = [0.0001, 0.000001]
C_range = [2 ** i for i in range(-3, 5, 1)]
regargs = {
    "epsilon" : 0.1,
    "cv_method" : "kfold",
    "cv_args" : {"k_folds" : 10},
    "kernel" : 'rbf',
    "tuned_parameters" : [{
        'kernel': ['rbf'],
        'C': C_range,
        'gamma': gamma_range}]}

tinterpolated = interpolate(damaged, method = 'mreg',\
                            neighbor_series = nseries,\
                            reg = 'svr',
                            regargs = regargs)

d = array([m[0] for m in tinterpolated])
y1 = array([m[1] for m in tinterpolated]) #score
y2 = array([m[2] for m in tinterpolated]) #speed

d_hat = array([m[0] for m in damaged])
y1_hat = array([m[1] for m in damaged])
y2_hat = array([m[2] for m in damaged])

d_true = array([m[0] for m in measurements])
y1_true = array([m[1] for m in measurements])
y2_true = array([m[2] for m in measurements])

d_time = []
for i in range (len(d)):
    d_act = datetime.datetime.fromtimestamp(d[i])

d_time_hat = []
for i in range (len(d_hat)):
    d_act_hat = datetime.datetime.fromtimestamp(d_hat[i])

d_time_true = []
for i in range (len(d_true)):
    d_act_true = datetime.datetime.fromtimestamp(d_true[i])

    figure = plt.figure(figsize=(8, 5))
    plt.xticks(rotation = 75)
    xfmt = md.DateFormatter('%Y/%m/%d %H-h')
    plt.ylim(-2, 32)
    plt.ylabel("Corrected Power (MW), Wind Speed (m/s)")
    plt.plot(d_time, y1, label = 'Power Production (interpolated)', color="b")
    plt.plot(d_time, y2, label = 'Wind Speed (interpolated)', color="g")
    plt.plot(d_time_true, y1_true, label = 'Power Production', color="b", linestyle="--")
    plt.plot(d_time_true, y2_true, label = 'Wind Speed', color="g", linestyle="--")
    plt.plot(d_time_hat, y1_hat, label = 'Power Production (damaged)',
        color="b", linestyle=".", marker="o")
    plt.plot(d_time_hat, y2_hat, label = 'Wind Speed (damaged)', color="g",
        marker="o", linestyle=".")
    plt.legend(loc='lower right')
    plt.title("Timeseries of the Selected Turbine")


Fork me on GitHub