2017-11-20 10 views
1

이 코드는 특히 효율적이거나 우아한 코드가 아닙니다. 그래서 같은 INSIDE라는 만든 임시 테이블에서 쿼리 오전 : 그때 내가 작업하고 한 모델을 테스트하기위한 시도로 안쪽에 다음 쿼리를 실행하려고 해요빨간 시프트 상관 하위 쿼리 패턴이 지원되지 않음

CREATE TEMP TABLE INSIDE (CONNECT_DATE DATE, DAILY_CONNECTIONS INT);` 

.

SELECT * 
, q5.DAN_PREDICTION - q5.LINEAR_PREDICTION AS PREDICTION_COMPARISON 
, q5.DAN_PREDICTION - q5.ACTUAL_MONTH_END_AMOUNT AS DAN_VARIANCE 
, q5.LINEAR_PREDICTION - q5.ACTUAL_MONTH_END_AMOUNT AS LINEAR_VARIANCE 
FROM (SELECT * 
    , q4.mtd + q4.last_yr_remainder + q4.run_rate * q4.days_remaining AS DAN_PREDICTION 
    , q4.mtd + q4.curr_yr_7_day * days_remaining AS LINEAR_PREDICTION 
    FROM(
SELECT 
     * 
     , q3.curr_yr_7_day - q3.last_yr_7_day AS RUN_RATE 
     FROM(
     SELECT 
      CONNECT_DATE 
     , DAILY_CONNECTIONS 
     , (cur_yr_1_prev + cur_yr_2_prev + cur_yr_3_prev + cur_yr_4_prev + cur_yr_5_prev + cur_yr_6_prev + cur_yr_7_prev)/7 AS CURR_YR_7_DAY 
     , (last_yr_1_prev + last_yr_2_prev + last_yr_3_prev + last_yr_4_prev + last_yr_5_prev + last_yr_6_prev + last_yr_7_prev)/7 AS LAST_YR_7_DAY 
     , (SELECT ISNULL(SUM(ins.DAILY_CONNECTIONS), 0) 
      FROM INSIDE ins 
      WHERE DATEPART(MONTH, ins.CONNECT_DATE) = DATEPART(MONTH, q2.CONNECT_DATE) 
      AND DATEPART(YEAR, ins.CONNECT_DATE) = DATEPART(YEAR, q2.CONNECT_DATE) 
      AND ins.CONNECT_DATE <= q2.CONNECT_DATE) AS MTD 
     , (SELECT ISNULL(SUM(ins.DAILY_CONNECTIONS), 0) 
      FROM INSIDE ins 
      WHERE DATEPART(MONTH, ins.CONNECT_DATE) = DATEPART(MONTH, q2.CONNECT_DATE) 
      AND DATEPART(YEAR, ins.CONNECT_DATE) = DATEPART(YEAR, q2.CONNECT_DATE)-1 
      AND ins.CONNECT_DATE > DATEADD(YEAR, -1, q2.CONNECT_DATE)) AS LAST_YR_REMAINDER 
     , (SELECT TOP 1 DATEPART(DAY, last_day(CONNECT_DATE)) 
      FROM INSIDE 
      WHERE CONNECT_DATE = q2.CONNECT_DATE)-DATEPART(DAY, q2.CONNECT_DATE) DAYS_REMAINING 
     , (SELECT ISNULL(SUM(ins.DAILY_CONNECTIONS), 0) 
      FROM INSIDE ins 
      WHERE DATEPART(MONTH, ins.CONNECT_DATE) = DATEPART(MONTH, q2.CONNECT_DATE) 
      AND DATEPART(YEAR, ins.CONNECT_DATE) = DATEPART(YEAR, q2.CONNECT_DATE)) AS ACTUAL_MONTH_END_AMOUNT 
     FROM 
      (SELECT 
      q1.CONNECT_DATE CONNECT_DATE 
      , q1.DAILY_CONNECTIONS DAILY_CONNECTIONS 
      , ISNULL((SELECT DAILY_CONNECTIONS FROM INSIDE WHERE CONNECT_DATE = DATEADD(DAY,-1,q1.connect_date)), 0) CUR_YR_1_PREV 
      , ISNULL((SELECT DAILY_CONNECTIONS FROM INSIDE WHERE CONNECT_DATE = DATEADD(DAY,-2,q1.connect_date)), 0) CUR_YR_2_PREV 
      , ISNULL((SELECT DAILY_CONNECTIONS FROM INSIDE WHERE CONNECT_DATE = DATEADD(DAY,-3,q1.connect_date)), 0) CUR_YR_3_PREV 
      , ISNULL((SELECT DAILY_CONNECTIONS FROM INSIDE WHERE CONNECT_DATE = DATEADD(DAY,-4,q1.connect_date)), 0) CUR_YR_4_PREV 
      , ISNULL((SELECT DAILY_CONNECTIONS FROM INSIDE WHERE CONNECT_DATE = DATEADD(DAY,-5,q1.connect_date)), 0) CUR_YR_5_PREV 
      , ISNULL((SELECT DAILY_CONNECTIONS FROM INSIDE WHERE CONNECT_DATE = DATEADD(DAY,-6,q1.connect_date)), 0) CUR_YR_6_PREV 
      , ISNULL((SELECT DAILY_CONNECTIONS FROM INSIDE WHERE CONNECT_DATE = DATEADD(DAY,-7,q1.connect_date)), 0) CUR_YR_7_PREV 
      , ISNULL((SELECT DAILY_CONNECTIONS FROM INSIDE WHERE CONNECT_DATE = DATEADD(YEAR, -1,(DATEADD(DAY,-1,q1.connect_date)))), 0) LAST_YR_1_PREV 
      , ISNULL((SELECT DAILY_CONNECTIONS FROM INSIDE WHERE CONNECT_DATE = DATEADD(YEAR, -1,(DATEADD(DAY,-2,q1.connect_date)))), 0) LAST_YR_2_PREV 
      , ISNULL((SELECT DAILY_CONNECTIONS FROM INSIDE WHERE CONNECT_DATE = DATEADD(YEAR, -1,(DATEADD(DAY,-3,q1.connect_date)))), 0) LAST_YR_3_PREV 
      , ISNULL((SELECT DAILY_CONNECTIONS FROM INSIDE WHERE CONNECT_DATE = DATEADD(YEAR, -1,(DATEADD(DAY,-4,q1.connect_date)))), 0) LAST_YR_4_PREV 
      , ISNULL((SELECT DAILY_CONNECTIONS FROM INSIDE WHERE CONNECT_DATE = DATEADD(YEAR, -1,(DATEADD(DAY,-5,q1.connect_date)))), 0) LAST_YR_5_PREV 
      , ISNULL((SELECT DAILY_CONNECTIONS FROM INSIDE WHERE CONNECT_DATE = DATEADD(YEAR, -1,(DATEADD(DAY,-6,q1.connect_date)))), 0) LAST_YR_6_PREV 
      , ISNULL((SELECT DAILY_CONNECTIONS FROM INSIDE WHERE CONNECT_DATE = DATEADD(YEAR, -1,(DATEADD(DAY,-7,q1.connect_date)))), 0) LAST_YR_7_PREV 
      FROM INSIDE q1 ORDER BY q1.CONNECT_DATE 
     ) q2 ORDER BY q2.connect_date 
    ) q3 
    ) q4 
) q5 

내부 q1 쿼리를 실행하면 정상적으로 작동하는 것 같습니다. 문제는 q2에서 하위 쿼리를 실행할 때 시작됩니다. 내가 Redshift에 지원되지 않는 하위 쿼리 유형에 대한 문서를 검토 봤는데이 지배하는 이해하지

Amazon Invalid operation: This type of correlated subquery pattern is not supported due to internal error;

: 한 번에 그 중 하나 이상의 (MTD, LAST_YR_REMAINDER 등)을 실행하면 다음과 같은 오류가 발생합니다 깨고있다. 어떤 도움이라도 대단히 감사하겠습니다.

+0

나는 당신이 어떤 패턴을 사용할 수없는 히트되고있는 것으로 보인다 http://docs.aws.amazon.com/redshift/latest/dg/r_correlated_subqueries.html을 본 것 같아요. 나는 이것이 다른 그리고 아마도 더 좋은 방법으로 재 작성 될 수 있다고 생각한다. 제발, 당신이하고있는 일의 "논리"와 예상되는 결과를 몇 가지 샘플 데이터를 포함하도록 질문을 업데이트 할 수 있습니까? –

+0

'connect_date \t daily_connections : 나는 일부터 지난 7 일 동안 연결의 평균 수 사이의 차이를 찾아 년간의 실행 속도를 통해 올해를 계산 2016년 5월 20일 \t 867' 일곱 일 평균 지난 해. 그런 다음 해당 달의 연결 수를 작년 같은 달에 대한 나머지 연결에 더하고 실행 속도에 연결 날짜의 해당 달에 남은 일 수를 곱합니다. 마지막 단계 (q5)는 그 결과를 단지 몇 가지와 비교하는 것입니다. –

+0

나중에 다시보실 것입니다 - 귀하의 의견에있는 텍스트로 질문을 업데이트하십시오. 이유 : 다른 사람이 의견을 거치지 않고도 따라갈 수있는 최대한 충실하게 질문하는 것이 중요합니다. –

답변

0

너무 많은 인라인 서브 쿼리가 있습니다. Redshift를 효율적으로 실행할 수있는 방법으로 논리를 분해하려면 공통 테이블 식 (CTE)을 사용해보십시오.

대부분의 인라인 하위 쿼리는 카티 전 곱을 통해 집계로 다시 작성할 수 있습니다.

WITH cte1 AS (
    SELECT i1.CONNECT_DATE  CONNECT_DATE 
      ,i1.DAILY_CONNECTIONS DAILY_CONNECTIONS 
      -- Sub-selects converted to an aggregate over a sparse matrix 
      ,MAX(CASE WHEN i2.CONNECT_DATE = DATEADD(DAY, -1, i1.connect_date)     THEN DAILY_CONNECTIONS ELSE NULL END) CUR_YR_1_PREV 
      ,MAX(CASE WHEN i2.CONNECT_DATE = DATEADD(DAY, -2, i1.connect_date)     THEN DAILY_CONNECTIONS ELSE NULL END) CUR_YR_2_PREV 
      ,MAX(CASE WHEN i2.CONNECT_DATE = DATEADD(DAY, -3, i1.connect_date)     THEN DAILY_CONNECTIONS ELSE NULL END) CUR_YR_3_PREV 
      ,MAX(CASE WHEN i2.CONNECT_DATE = DATEADD(DAY, -4, i1.connect_date)     THEN DAILY_CONNECTIONS ELSE NULL END) CUR_YR_4_PREV 
      ,MAX(CASE WHEN i2.CONNECT_DATE = DATEADD(DAY, -5, i1.connect_date)     THEN DAILY_CONNECTIONS ELSE NULL END) CUR_YR_5_PREV 
      ,MAX(CASE WHEN i2.CONNECT_DATE = DATEADD(DAY, -6, i1.connect_date)     THEN DAILY_CONNECTIONS ELSE NULL END) CUR_YR_6_PREV 
      ,MAX(CASE WHEN i2.CONNECT_DATE = DATEADD(DAY, -7, i1.connect_date)     THEN DAILY_CONNECTIONS ELSE NULL END) CUR_YR_7_PREV 
      ,MAX(CASE WHEN i2.CONNECT_DATE = DATEADD(YEAR, -1, (DATEADD(DAY,-1, i1.connect_date))) THEN DAILY_CONNECTIONS ELSE NULL END) LAST_YR_1_PREV 
      ,MAX(CASE WHEN i2.CONNECT_DATE = DATEADD(YEAR, -1, (DATEADD(DAY,-2, i1.connect_date))) THEN DAILY_CONNECTIONS ELSE NULL END) LAST_YR_2_PREV 
      ,MAX(CASE WHEN i2.CONNECT_DATE = DATEADD(YEAR, -1, (DATEADD(DAY,-3, i1.connect_date))) THEN DAILY_CONNECTIONS ELSE NULL END) LAST_YR_3_PREV 
      ,MAX(CASE WHEN i2.CONNECT_DATE = DATEADD(YEAR, -1, (DATEADD(DAY,-4, i1.connect_date))) THEN DAILY_CONNECTIONS ELSE NULL END) LAST_YR_4_PREV 
      ,MAX(CASE WHEN i2.CONNECT_DATE = DATEADD(YEAR, -1, (DATEADD(DAY,-5, i1.connect_date))) THEN DAILY_CONNECTIONS ELSE NULL END) LAST_YR_5_PREV 
      ,MAX(CASE WHEN i2.CONNECT_DATE = DATEADD(YEAR, -1, (DATEADD(DAY,-6, i1.connect_date))) THEN DAILY_CONNECTIONS ELSE NULL END) LAST_YR_6_PREV 
      ,MAX(CASE WHEN i2.CONNECT_DATE = DATEADD(YEAR, -1, (DATEADD(DAY,-7, i1.connect_date))) THEN DAILY_CONNECTIONS ELSE NULL END) LAST_YR_7_PREV 
      ,SUM(CASE WHEN DATEPART(MONTH, i2.CONNECT_DATE) = DATEPART(MONTH, i1.CONNECT_DATE) 
        AND DATEPART(YEAR, i2.CONNECT_DATE) = DATEPART(YEAR, i1.CONNECT_DATE) 
        AND i2.CONNECT_DATE <= i1.CONNECT_DATE 
        THEN i2.DAILY_CONNECTIONS 
       ELSE NULL END) AS MTD 
      ,SUM(CASE WHEN DATEPART(MONTH, i2.CONNECT_DATE) = DATEPART(MONTH, i1.CONNECT_DATE) 
        AND DATEPART(YEAR, i2.CONNECT_DATE) = DATEPART(YEAR, i1.CONNECT_DATE)-1 
        AND i2.CONNECT_DATE > DATEADD(YEAR, -1, i1.CONNECT_DATE) 
        THEN i2.DAILY_CONNECTIONS 
       ELSE NULL END) AS LAST_YR_REMAINDER 
      ,MAX(CASE WHEN i2.CONNECT_DATE = i1.CONNECT_DATE-DATEPART(DAY, i1.CONNECT_DATE) 
        THEN DATEPART(DAY, last_day(CONNECT_DATE)) 
       ELSE NULL END) AS DAYS_REMAINING 
      ,SUM(CASE WHEN DATEPART(MONTH, i2.CONNECT_DATE) = DATEPART(MONTH, i1.CONNECT_DATE) 
        AND DATEPART(YEAR, i2.CONNECT_DATE) = DATEPART(YEAR, i1.CONNECT_DATE) 
        THEN i2.DAILY_CONNECTIONS 
       ELSE NULL END) AS ACTUAL_MONTH_END_AMOUNT 
    FROM  INSIDE i1 
    -- Create an intentional cartesian product 
    CROSS JOIN INSIDE i2 
    /* Consider limiting the cartesian to a specific overlap range. E.g. 
    WHERE i2.CONNECT_DATE <= DATEADD(YEAR, -1, (DATEADD(DAY,-7, i1.connect_date))) 
    */ 
    -- Use group by to collapse the cartesian back to the original size 
    GROUP BY 1, 2 
    ORDER BY 1 
), cte2 AS (
    SELECT CONNECT_DATE 
     , DAILY_CONNECTIONS 
     , (cur_yr_1_prev + cur_yr_2_prev + cur_yr_3_prev + cur_yr_4_prev + cur_yr_5_prev + cur_yr_6_prev + cur_yr_7_prev)/7 AS CURR_YR_7_DAY 
     , (last_yr_1_prev + last_yr_2_prev + last_yr_3_prev + last_yr_4_prev + last_yr_5_prev + last_yr_6_prev + last_yr_7_prev)/7 AS LAST_YR_7_DAY 
     , MTD, LAST_YR_REMAINDER, DAYS_REMAINING, ACTUAL_MONTH_END_AMOUNT 
    FROM cte1 
    ORDER BY connect_date 
), cte3 AS (
    SELECT *, curr_yr_7_day - last_yr_7_day AS RUN_RATE 
    FROM cte2 
), cte4 AS (
    SELECT * 
      , mtd + last_yr_remainder + run_rate * days_remaining AS DAN_PREDICTION 
      , mtd + curr_yr_7_day * days_remaining AS LINEAR_PREDICTION 
    FROM cte3 
) 
SELECT * 
    , DAN_PREDICTION - LINEAR_PREDICTION AS PREDICTION_COMPARISON 
    , DAN_PREDICTION - ACTUAL_MONTH_END_AMOUNT AS DAN_VARIANCE 
    , LINEAR_PREDICTION - ACTUAL_MONTH_END_AMOUNT AS LINEAR_VARIANCE 
FROM cte4