2013-10-18 5 views
2

칼만 필터 (KF)를 다음 예측 문제에 적용 할 때 나타나는 문제에 대한 질문이 있습니다. 나는 간단한 코드 샘플을 포함시켰다.칼만 필터를 사용하여 시뮬레이션을 향상 시키지만 더 나쁜 결과를 얻는 파이썬

목표 : KF가 (t + 24 시간) 앞으로 얻은 측정 결과를 사용하여 하루 전날 예측/시뮬레이션 결과를 개선하는 데 적합한 지 알고 싶습니다. 우리는 측정이 완벽한 가정 (즉, 우리가 예측이 완벽하게 측정 일치 얻을 수 있다면, 우리는 행복하다.) : 목표는 가능한

가정 등의 측정에 가깝게 예측을 얻는 것입니다.

우리는 하나의 측정 변수 (z, 실제 풍속)와 하나의 시뮬레이션 변수 (x, 예상 풍속)를 가지고 있습니다.

시뮬레이션 된 풍속 x는 다양한 기상 데이터 (블랙 박스)를 사용하는 NWP (수치 기상 예측) 소프트웨어에서 생성됩니다. 시뮬레이션 파일은 매 30 분마다 데이터를 포함하여 매일 생성됩니다.

지금 얻은 측정 값과 스칼라 칼만 필터를 사용하여 예측 데이터 (t-24 시간 전에 생성 됨)를 사용하여 t + 24 시간 예측을 수정하려고합니다. 참고로, 내가 사용 : http://www.swarthmore.edu/NatSci/echeeve1/Ref/Kalman/ScalarKalman.html

코드 :

#! /usr/bin/python 

import numpy as np 
import pylab 

import os 


def main(): 

    # x = 336 data points of simulated wind speed for 7 days * 24 hour * 2 (every half an hour) 
    # Imagine at time t, we will get a x_t fvalue or t+48 or a 24 hours later. 
    x = load_x() 

    # this is a list that will contain 336 data points of our corrected data 
    x_sample_predict_list = [] 

    # z = 336 data points for 7 days * 24 hour * 2 of actual measured wind speed (every half an hour) 
    z = load_z() 

    # Here is the setup of the scalar kalman filter 
    # reference: http://www.swarthmore.edu/NatSci/echeeve1/Ref/Kalman/ScalarKalman.html 
    # state transition matrix (we simply have a scalar) 
    # what you need to multiply the last time's state to get the newest state 
    # we get the x_t+1 = A * x_t, since we get the x_t+1 directly for simulation 
    # we will have a = 1 
    a = 1.0 

    # observation matrix 
    # what you need to multiply to the state, convert it to the same form as incoming measurement 
    # both state and measurements are wind speed, so set h = 1 
    h = 1.0 

    Q = 16.0 # expected process variance of predicted Wind Speed 
    R = 9.0 # expected measurement variance of Wind Speed 

    p_j = Q # process covariance is equal to the initial process covariance estimate 

    # Kalman gain is equal to k = hp-_j/(hp-_j + R). With perfect measurement 
    # R = 0, k reduces to k=1/h which is 1 
    k = 1.0 

    # one week data 
    # original R2 = 0.183 
    # with delay = 6, R2 = 0.295 
    # with delay = 12, R2 = 0.147 
    # with delay = 48, R2 = 0.075 
    delay = 6 

    # Kalman loop 
    for t, x_sample in enumerate(x): 

     if t <= delay:   
      # for the first day of the forecast, 
      # we don't have forecast data and measurement 
      # from a day before to do correction 
      x_sample_predict = x_sample    
     else: # t > 48 
      # for a priori estimate we take x_sample as is 
      # x_sample = x^-_j = a x^-_j_1 + b u_j 
      # Inside the NWP (numerical weather prediction, 
      # the x_sample should be on x_sample_j-1 (assumption) 

      x_sample_predict_prior = a * x_sample 

      # we use the measurement from t-delay (ie. could be a day ago) 
      # and forecast data from t-delay, to produce a leading residual that can be used to 
      # correct the forecast. 
      residual = z[t-delay] - h * x_sample_predict_list[t-delay] 


      p_j_prior = a**2 * p_j + Q 

      k = h * p_j_prior/(h**2 * p_j_prior + R) 

      # we update our prediction based on the residual 
      x_sample_predict = x_sample_predict_prior + k * residual 

      p_j = p_j_prior * (1 - h * k) 

      #print k 
      #print p_j_prior 
      #print p_j 
      #raw_input() 

     x_sample_predict_list.append(x_sample_predict) 

    # initial goodness of fit 
    R2_val_initial = calculate_regression(x,z) 
    R2_string_initial = "R2 initial: {0:10.3f}, ".format(R2_val_initial)  
    print R2_string_initial  # R2_val_initial = 0.183 

    # final goodness of fit 
    R2_val_final = calculate_regression(x_sample_predict_list,z) 
    R2_string_final = "R2 final: {0:10.3f}, ".format(R2_val_final) 
    print R2_string_final  # R2_val_final = 0.117, which is worse 


    timesteps = xrange(len(x))  
    pylab.plot(timesteps,x,'r-', timesteps,z,'b:', timesteps,x_sample_predict_list,'g--') 
    pylab.xlabel('Time') 
    pylab.ylabel('Wind Speed') 
    pylab.title('Simulated Wind Speed vs Actual Wind Speed') 
    pylab.legend(('predicted','measured','kalman')) 
    pylab.show() 


def calculate_regression(x, y):   
    R2 = 0 
    A = np.array([x, np.ones(len(x))]) 
    model, resid = np.linalg.lstsq(A.T, y)[:2] 
    R2_val = 1 - resid[0]/(y.size * y.var())   
    return R2_val 

def load_x(): 
    return np.array([2, 3, 3, 5, 4, 4, 4, 5, 5, 6, 5, 7, 7, 7, 8, 8, 8, 9, 9, 10, 10, 10, 11, 11, 
    11, 10, 8, 8, 8, 8, 6, 3, 4, 5, 5, 5, 6, 5, 5, 5, 6, 5, 5, 6, 6, 7, 6, 8, 9, 10, 
    12, 11, 10, 10, 10, 11, 11, 10, 8, 8, 9, 8, 9, 9, 9, 9, 8, 9, 8, 11, 11, 11, 12, 
    12, 13, 13, 13, 13, 13, 13, 13, 14, 13, 13, 12, 13, 13, 12, 12, 13, 13, 12, 12, 
    11, 12, 12, 19, 18, 17, 15, 13, 14, 14, 14, 13, 12, 12, 12, 12, 11, 10, 10, 10, 
    10, 9, 9, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 7, 7, 8, 8, 8, 6, 5, 5, 
    5, 5, 5, 5, 6, 4, 4, 4, 6, 7, 8, 7, 7, 9, 10, 10, 9, 9, 8, 7, 5, 5, 5, 5, 5, 5, 
    5, 5, 6, 5, 5, 5, 4, 4, 6, 6, 7, 7, 7, 7, 6, 6, 5, 5, 4, 2, 2, 2, 1, 1, 1, 2, 3, 
    13, 13, 12, 11, 10, 9, 10, 10, 8, 9, 8, 7, 5, 3, 2, 2, 2, 3, 3, 4, 4, 5, 6, 6, 
    7, 7, 7, 6, 6, 6, 7, 6, 6, 5, 4, 4, 3, 3, 3, 2, 2, 1, 5, 5, 3, 2, 1, 2, 6, 7, 
    7, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 9, 9, 9, 9, 9, 8, 8, 8, 8, 7, 7, 
    7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 11, 11, 11, 11, 10, 10, 9, 10, 10, 10, 2, 2, 
    2, 3, 1, 1, 3, 4, 5, 8, 9, 9, 9, 9, 8, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 
    7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 7, 5, 5, 5, 5, 5, 6, 5]) 

def load_z(): 
    return np.array([3, 2, 1, 1, 1, 1, 3, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 2, 1, 1, 2, 2, 2, 
    2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 4, 4, 4, 5, 4, 4, 5, 5, 5, 6, 6, 
    6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 8, 8, 8, 9, 10, 9, 9, 10, 10, 9, 
    9, 10, 9, 9, 10, 9, 8, 9, 9, 7, 7, 6, 7, 6, 6, 7, 7, 8, 8, 8, 8, 8, 8, 7, 6, 7, 
    8, 8, 7, 8, 9, 9, 9, 9, 10, 9, 9, 9, 8, 8, 10, 9, 10, 10, 9, 9, 9, 10, 9, 8, 7, 
    7, 7, 7, 8, 7, 6, 5, 4, 3, 5, 3, 5, 4, 4, 4, 2, 4, 3, 2, 1, 1, 2, 1, 2, 1, 4, 4, 
    4, 4, 4, 3, 3, 3, 1, 1, 1, 1, 2, 3, 3, 2, 3, 3, 3, 2, 2, 5, 4, 2, 5, 4, 1, 1, 1, 
    1, 1, 1, 1, 2, 2, 1, 1, 3, 3, 3, 3, 3, 4, 3, 4, 3, 4, 4, 4, 4, 3, 3, 4, 4, 4, 4, 
    4, 4, 5, 5, 5, 4, 3, 3, 3, 3, 3, 3, 3, 3, 1, 2, 2, 3, 3, 1, 2, 1, 1, 2, 4, 3, 1, 
    1, 2, 0, 0, 0, 2, 1, 0, 0, 2, 3, 2, 4, 4, 3, 3, 4, 5, 5, 5, 4, 5, 4, 4, 4, 5, 5, 
    4, 3, 3, 4, 4, 4, 3, 3, 3, 4, 4, 4, 5, 5, 5, 4, 5, 5, 5, 5, 6, 5, 5, 8, 9, 8, 9, 
    9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 9, 10, 9, 8, 8, 9, 8, 9, 9, 10, 9, 9, 9, 
    7, 7, 9, 8, 7, 6, 6, 5, 5, 5, 5, 3, 3, 3, 4, 6, 5, 5, 6, 5]) 

if __name__ == '__main__': main() # this avoids executing main on import your_module 

-------------------------

관찰 : 어제의 예측이 과다 예측 (양의 바이어스가)입니다

1) 경우, 오늘, 내가 편견을 멀리 빼서 수정을 것입니다. 실제로, 오늘 내가 예상치 못한 상황에 빠지면 긍정적 인 편향을 빼면 더욱 나쁜 예측이됩니다. 실제로 전반적으로 가벼운 데이터를 사용하여 넓은 스윙을 관찰합니다. 나의 모범은 무엇이 잘못 되었습니까?

2) 대부분의 칼만 필터 자원은 칼만 필터가 사후 공분산 p_j = E {(x_j - x^_j)}를 최소화하고 p_j를 최소화하기 위해 K를 선택한다는 증거를 나타냅니다. 그러나 누군가가 사후 공분산을 최소화하는 것이 프로세스 백색 잡음 w의 효과를 실제로 최소화하는 방법을 설명 할 수 있습니까? 실제 상황에서 실제 풍속과 측정 된 풍속이 5m/s라고합시다. 예상 풍속은 6 m/s, 즉. w = 1 m/s의 소음이 있었다. 나머지는 5 - 6 = -1 m/s입니다. 예상에서 1 m/s를 취하여 5 m/s로 돌아가십시오. 이것이 공정 소음의 영향을 최소화하는 방법입니까?

3) 다음은 부드러운 날씨 예보에 KF를 적용하는 것에 대해 언급 한 논문입니다. http://hal.archives-ouvertes.fr/docs/00/50/59/93/PDF/Louka_etal_jweia2008.pdf. 흥미로운 점은 "새로운 관측 값 y_t가 알려지 자마자 시간 t에서의 x 추정값은 x_t = x_t/t-1 = K (y_t - H_t * x_t/t- 1) ". 실제 시간과 관련하여이를 말한다면, "새로운 관측 값이 알려지 자마자 이제 견적은 x_t가됩니다. "KF가 어떻게 데이터를 실시간으로 측정 할 수 있는지 알 수 있습니다. 그러나 t = now에서 예측 데이터를 수정하는 경우 t = now의 측정 데이터를 사용하여 더 이상 예측이 어떻게됩니까?

감사합니다.

갱신 1 :

4) 우리는 R2를 원한다면 칼만 대 데이터 처리 사이 이상 예측은, 현재의 측정으로부터 계산 된 바이어스 전류보다 얼마나 많이 조사 코드에 지연을 추가 한 미처리 된 데이터 대 측정 데이터를 개선하기 위해 데이터 시계열을 측정합니다. 이 예에서 측정이 예측 6 시간 단계 (지금부터 3 시간)를 개선하는 데 사용되는 경우 여전히 유용합니다 (R2는 0.183에서 0.295로 간다). 그러나 측정을 사용하여 하루 후 예측을 향상 시키면 상관 관계가 파괴됩니다 (R2는 0.075로 떨어집니다).

+1

없는, 그래서 나는 코멘트로 쓰기 : 제 생각에는

residual = z[t-delay] - h * x_sample_predict_list[t-delay] 

당신은 했어야 시간 시리즈 분석은 Matlab 또는 R에서 매우 쉽게 수행 할 수 있습니다.이 부분의 Python은 그 자체로 잘 개발되지 않을 수도 있습니다. 증거로서 Matlab과 Python의 KF 패키지가 있습니다 : – Mai

+0

Matlab - http://www.mathworks.com/help/control/ug/kalman-filtering.html R - http : //stat.ethz. ch/R-manual/R-patched/library/stats/html/KalmanLike.html – Mai

+0

파이썬을 정말로 사용하려면 https://github.com/pykalman/pykalman 또는 http : // arxiv를 사용해보십시오. org/ftp/arxiv/papers/1204/1204.0375.pdf – Mai

답변

1

완벽한 측정 R을 가정하지 않고 테스트 스칼라 구현을 업데이트하여 칼만 이득을 일정한 값으로 1 감소 시켰습니다. 이제 RMSE 오류가 감소하면서 시계열에 대한 개선이 나타납니다. .

#! /usr/bin/python 

import numpy as np 
import pylab 

import os 

# RMSE improved 
def main(): 

    # x = 336 data points of simulated wind speed for 7 days * 24 hour * 2 (every half an hour) 
    # Imagine at time t, we will get a x_t fvalue or t+48 or a 24 hours later. 
    x = load_x() 

    # this is a list that will contain 336 data points of our corrected data 
    x_sample_predict_list = [] 

    # z = 336 data points for 7 days * 24 hour * 2 of actual measured wind speed (every half an hour) 
    z = load_z() 

    # Here is the setup of the scalar kalman filter 
    # reference: http://www.swarthmore.edu/NatSci/echeeve1/Ref/Kalman/ScalarKalman.html 
    # state transition matrix (we simply have a scalar) 
    # what you need to multiply the last time's state to get the newest state 
    # we get the x_t+1 = A * x_t, since we get the x_t+1 directly for simulation 
    # we will have a = 1 
    a = 1.0 

    # observation matrix 
    # what you need to multiply to the state, convert it to the same form as incoming measurement 
    # both state and measurements are wind speed, so set h = 1 
    h = 1.0 

    Q = 1.0  # expected process noise of predicted Wind Speed  
    R = 1.0  # expected measurement noise of Wind Speed 

    p_j = Q # process covariance is equal to the initial process covariance estimate 

    # Kalman gain is equal to k = hp-_j/(hp-_j + R). With perfect measurement 
    # R = 0, k reduces to k=1/h which is 1 
    k = 1.0 

    # one week data 
    # original R2 = 0.183 
    # with delay = 6, R2 = 0.295 
    # with delay = 12, R2 = 0.147 
    # with delay = 48, R2 = 0.075 
    delay = 6 

    # Kalman loop 
    for t, x_sample in enumerate(x): 

     if t <= delay:   
      # for the first day of the forecast, 
      # we don't have forecast data and measurement 
      # from a day before to do correction 
      x_sample_predict = x_sample    
     else: # t > 48 
      # for a priori estimate we take x_sample as is 
      # x_sample = x^-_j = a x^-_j_1 + b u_j 
      # Inside the NWP (numerical weather prediction, 
      # the x_sample should be on x_sample_j-1 (assumption) 

      x_sample_predict_prior = a * x_sample 

      # we use the measurement from t-delay (ie. could be a day ago) 
      # and forecast data from t-delay, to produce a leading residual that can be used to 
      # correct the forecast. 
      residual = z[t-delay] - h * x_sample_predict_list[t-delay] 

      p_j_prior = a**2 * p_j + Q 

      k = h * p_j_prior/(h**2 * p_j_prior + R) 

      # we update our prediction based on the residual 
      x_sample_predict = x_sample_predict_prior + k * residual 

      p_j = p_j_prior * (1 - h * k) 

      #print k 
      #print p_j_prior 
      #print p_j 
      #raw_input() 

     x_sample_predict_list.append(x_sample_predict) 

    # initial goodness of fit 
    R2_val_initial = calculate_regression(x,z) 
    R2_string_initial = "R2 original: {0:10.3f}, ".format(R2_val_initial) 
    print R2_string_initial  # R2_val_original = 0.183 

    original_RMSE = (((x-z)**2).mean())**0.5 
    print "original_RMSE" 
    print original_RMSE 
    print "\n" 

    # final goodness of fit 
    R2_val_final = calculate_regression(x_sample_predict_list,z) 
    R2_string_final = "R2 final: {0:10.3f}, ".format(R2_val_final) 
    print R2_string_final  # R2_val_final = 0.267, which is better 

    final_RMSE = (((x_sample_predict-z)**2).mean())**0.5 
    print "final_RMSE" 
    print final_RMSE  
    print "\n" 


    timesteps = xrange(len(x))  
    pylab.plot(timesteps,x,'r-', timesteps,z,'b:', timesteps,x_sample_predict_list,'g--') 
    pylab.xlabel('Time') 
    pylab.ylabel('Wind Speed') 
    pylab.title('Simulated Wind Speed vs Actual Wind Speed') 
    pylab.legend(('predicted','measured','kalman')) 
    pylab.show() 


def calculate_regression(x, y):   
    R2 = 0 
    A = np.array([x, np.ones(len(x))]) 
    model, resid = np.linalg.lstsq(A.T, y)[:2] 
    R2_val = 1 - resid[0]/(y.size * y.var())   
    return R2_val 

def load_x(): 
    return np.array([2, 3, 3, 5, 4, 4, 4, 5, 5, 6, 5, 7, 7, 7, 8, 8, 8, 9, 9, 10, 10, 10, 11, 11, 
    11, 10, 8, 8, 8, 8, 6, 3, 4, 5, 5, 5, 6, 5, 5, 5, 6, 5, 5, 6, 6, 7, 6, 8, 9, 10, 
    12, 11, 10, 10, 10, 11, 11, 10, 8, 8, 9, 8, 9, 9, 9, 9, 8, 9, 8, 11, 11, 11, 12, 
    12, 13, 13, 13, 13, 13, 13, 13, 14, 13, 13, 12, 13, 13, 12, 12, 13, 13, 12, 12, 
    11, 12, 12, 19, 18, 17, 15, 13, 14, 14, 14, 13, 12, 12, 12, 12, 11, 10, 10, 10, 
    10, 9, 9, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 7, 7, 8, 8, 8, 6, 5, 5, 
    5, 5, 5, 5, 6, 4, 4, 4, 6, 7, 8, 7, 7, 9, 10, 10, 9, 9, 8, 7, 5, 5, 5, 5, 5, 5, 
    5, 5, 6, 5, 5, 5, 4, 4, 6, 6, 7, 7, 7, 7, 6, 6, 5, 5, 4, 2, 2, 2, 1, 1, 1, 2, 3, 
    13, 13, 12, 11, 10, 9, 10, 10, 8, 9, 8, 7, 5, 3, 2, 2, 2, 3, 3, 4, 4, 5, 6, 6, 
    7, 7, 7, 6, 6, 6, 7, 6, 6, 5, 4, 4, 3, 3, 3, 2, 2, 1, 5, 5, 3, 2, 1, 2, 6, 7, 
    7, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 9, 9, 9, 9, 9, 8, 8, 8, 8, 7, 7, 
    7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 11, 11, 11, 11, 10, 10, 9, 10, 10, 10, 2, 2, 
    2, 3, 1, 1, 3, 4, 5, 8, 9, 9, 9, 9, 8, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 
    7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 7, 5, 5, 5, 5, 5, 6, 5]) 

def load_z(): 
    return np.array([3, 2, 1, 1, 1, 1, 3, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 2, 1, 1, 2, 2, 2, 
    2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 4, 4, 4, 4, 5, 4, 4, 5, 5, 5, 6, 6, 
    6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 8, 8, 8, 9, 10, 9, 9, 10, 10, 9, 
    9, 10, 9, 9, 10, 9, 8, 9, 9, 7, 7, 6, 7, 6, 6, 7, 7, 8, 8, 8, 8, 8, 8, 7, 6, 7, 
    8, 8, 7, 8, 9, 9, 9, 9, 10, 9, 9, 9, 8, 8, 10, 9, 10, 10, 9, 9, 9, 10, 9, 8, 7, 
    7, 7, 7, 8, 7, 6, 5, 4, 3, 5, 3, 5, 4, 4, 4, 2, 4, 3, 2, 1, 1, 2, 1, 2, 1, 4, 4, 
    4, 4, 4, 3, 3, 3, 1, 1, 1, 1, 2, 3, 3, 2, 3, 3, 3, 2, 2, 5, 4, 2, 5, 4, 1, 1, 1, 
    1, 1, 1, 1, 2, 2, 1, 1, 3, 3, 3, 3, 3, 4, 3, 4, 3, 4, 4, 4, 4, 3, 3, 4, 4, 4, 4, 
    4, 4, 5, 5, 5, 4, 3, 3, 3, 3, 3, 3, 3, 3, 1, 2, 2, 3, 3, 1, 2, 1, 1, 2, 4, 3, 1, 
    1, 2, 0, 0, 0, 2, 1, 0, 0, 2, 3, 2, 4, 4, 3, 3, 4, 5, 5, 5, 4, 5, 4, 4, 4, 5, 5, 
    4, 3, 3, 4, 4, 4, 3, 3, 3, 4, 4, 4, 5, 5, 5, 4, 5, 5, 5, 5, 6, 5, 5, 8, 9, 8, 9, 
    9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 9, 10, 9, 8, 8, 9, 8, 9, 9, 10, 9, 9, 9, 
    7, 7, 9, 8, 7, 6, 6, 5, 5, 5, 5, 3, 3, 3, 4, 6, 5, 5, 6, 5]) 

if __name__ == '__main__': main() # this avoids executing main on import your_module 
0

이 줄은 Scalar Kalman Filter 존중되지 않은 :이 솔루션은

residual = z[t -delay] - h * x_sample_predict_prior