SQL_튜닝_13

소트연산

DB에서 소트 연산은 사용자에게 보여줄 데이터 셋을 최종적으로 정렬하는 연산을 말합니다 우리가 흔히 쿼리에서 ORDER BY에 의한 ASC , DESC 가 여기에 해당되고 사용자에게 보여줄 최종 데이터 셋을 정렬하다 보니 자원을 상당히 많이 잡아먹는 요소이기도 합니다

인덱스와 소트연산

인덱스를 설정하면 인덱스 테이블은 기본적으로 정렬이 되어 있습니다 그렇기 때문에 Index Range Scan 을 사용할 수 있고 부차적으로 얻을 수 있는 것이 소트 연산 생략입니다 이번에는 소트 연산이 쿼리 성능에 미치는 영향으로 이야기를 풀어나가려고 합니다

java 소스예제

https://gitlab.com/kimdongy1000/sqltuningproject

이곳에서 Ex4_custom 를 찾자 아래 .sql 파일도 스키마도 마련해두었습니다

간단한 테이블 스키마

DROP TABLE Ex4_Custom 

CREATE TABLE Ex4_Custom(
	
	customApplyDate varchar(8), 
	customName      varchar(11),
	customSex       varchar(2),
	customAge       number(2)
	
)

CREATE INDEX Ex4_Custom_idx ON Ex4_Custom (customApplyDate , customName , customSex , customAge);

java 소스

package com.example.sqltuningproject.controller;

import org.mybatis.spring.SqlSessionTemplate;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;

import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.util.*;

@RestController
@RequestMapping("/ex4_custom")
public class Ex4_Custom {

    private static final String[] firstNames = {"김", "이", "박", "최", "정", "강", "조", "윤", "장", "임"};
    private static final String[] middleNames = {"민", "서", "지", "현", "승", "예", "영", "우", "준", "석"};
    private static final String[] lastNames = {"호", "빈", "희", "아", "수", "진", "연", "혁", "기", "찬"};

    @Autowired
    private SqlSessionTemplate sqlSessionTemplate;

    @GetMapping("/create")
    public String create()
    {

        List<Ex4_Custom_Model> customs = new ArrayList<>();
        Set<String> dateSet = new HashSet<>();
        Random random = new Random();
        Random sexRandom = new Random();
        Random ageRandom = new Random();

        // 시작 날짜 및 종료 날짜 설정
        LocalDate startDate = LocalDate.of(1900, 1, 1);
        LocalDate endDate = LocalDate.of(2024, 12, 31);

        // 총 일 수 계산 (1900-01-01 ~ 2024-12-31)
        int totalDays = (int) (endDate.toEpochDay() - startDate.toEpochDay()) + 1;
        DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyyMMdd");

        // 중복되지 않는 랜덤 날짜 생성
        while (dateSet.size() < 10000) {
            int randomDayOffset = random.nextInt(totalDays); // 0 ~ totalDays-1 범위의 랜덤 값
            LocalDate randomDate = startDate.plusDays(randomDayOffset);
            //System.out.println(randomDate.format(formatter));
            dateSet.add(randomDate.format(formatter)); // yyyyMMdd 형태로 저장
        }

        for (String date : dateSet) {
            String name = generateRandomName(random);
            String sex = "M";

            int rndSexInd = sexRandom.nextInt(2);
            int rndAgeInd = ageRandom.nextInt(80);

            if(rndSexInd % 2 == 0) {
                sex = "W";
            }

            sqlSessionTemplate.insert(this.getClass().getName() + ".create", new Ex4_Custom_Model(date , name , sex , rndAgeInd));

        }




        return "ok";

    }

    private static String generateRandomName(Random random) {
        return firstNames[random.nextInt(firstNames.length)] +
                middleNames[random.nextInt(middleNames.length)] +
                lastNames[random.nextInt(lastNames.length)];
    }

}



class Ex4_Custom_Model{

    private final String customApplyDate;
    private final String customName;
    private final String customSex;
    private final int customAge;

    public Ex4_Custom_Model(String customApplyDate, String customName, String customSex, int customAge) {
        this.customApplyDate = customApplyDate;
        this.customName = customName;
        this.customSex = customSex;
        this.customAge = customAge;
    }

    @Override
    public String toString() {
        return String.format("INSERT INTO ex4_Custom (customApplyDate, customName, customSex, customAge) VALUES (%s, %s, %s, '%d');",
                customApplyDate, customName, customSex, customAge);
    }
}

해당 소스는 // 총 일 수 계산 (1900-01-01 ~ 2024-12-31) 날짜 데이터를 가지고 손님의 랜덤 한 이름과 랜덤 한 성별 랜덤 한 나이를 insert 하는 소스입니다 총 1만 개의 데이터를 생성했습니다

Order By 생략

EXPLAIN PLAN FOR
SELECT * 
FROM Ex4_Custom
WHERE customApplyDate > '20010101'

SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY);

PLAN_TABLE_OUTPUT                                                                  |
-----------------------------------------------------------------------------------+
Plan hash value: 4234916565                                                        |
                                                                                   |
-----------------------------------------------------------------------------------|
| Id  | Operation        | Name           | Rows  | Bytes | Cost (%CPU)| Time     ||
-----------------------------------------------------------------------------------|
|   0 | SELECT STATEMENT |                |  1862 | 44688 |    13   (0)| 00:00:01 ||
|*  1 |  INDEX RANGE SCAN| EX4_CUSTOM_IDX |  1862 | 44688 |    13   (0)| 00:00:01 ||
-----------------------------------------------------------------------------------|
                                                                                   |
Predicate Information (identified by operation id):                                |
---------------------------------------------------                                |
                                                                                   |
   1 - access("CUSTOMAPPLYDATE">'20010101' AND "CUSTOMAPPLYDATE" IS NOT            |
              NULL)                                                                |

이 쿼리의 결과를 보게 되면 우리는 따로 Order by를 사용하지 않아도 데이터가 customApplyDate 별로 정렬이 되어 있는 것을 확인할 수 있습니다 이는 위에서도 설명했지만 인덱스 테이블 그 자체가 이미 정렬되어 있는 상태이기 때문에 우리가 따로 정렬 문구를 작성하지 않더라도 범위 스캔들 한 그대로 우리에게 결과 셋을 전달해 줍니다

Order By 추가

EXPLAIN PLAN FOR
SELECT * 
FROM Ex4_Custom
WHERE customApplyDate > '20010101'
ORDER BY customApplyDate

SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY);

PLAN_TABLE_OUTPUT                                                                  |
-----------------------------------------------------------------------------------+
Plan hash value: 4234916565                                                        |
                                                                                   |
-----------------------------------------------------------------------------------|
| Id  | Operation        | Name           | Rows  | Bytes | Cost (%CPU)| Time     ||
-----------------------------------------------------------------------------------|
|   0 | SELECT STATEMENT |                |  1862 | 44688 |    13   (0)| 00:00:01 ||
|*  1 |  INDEX RANGE SCAN| EX4_CUSTOM_IDX |  1862 | 44688 |    13   (0)| 00:00:01 ||
-----------------------------------------------------------------------------------|
                                                                                   |
Predicate Information (identified by operation id):                                |
---------------------------------------------------                                |
                                                                                   |
   1 - access("CUSTOMAPPLYDATE">'20010101' AND "CUSTOMAPPLYDATE" IS NOT            |
              NULL)                                                                |

그렇기에 우리가 인위적으로 order by를 넣어라도 그것이 인덱스 선두 칼럼이라면 옵티마이저는 굳이 소트 연산을 하지 않습니다

인덱스가 없는 경우

EXPLAIN PLAN FOR
SELECT * 
FROM Ex4_Custom
WHERE customApplyDate > '20010101'
ORDER BY customApplyDate 

SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY);

PLAN_TABLE_OUTPUT                                                                |
---------------------------------------------------------------------------------+
Plan hash value: 3638973679                                                      |
                                                                                 |
---------------------------------------------------------------------------------|
| Id  | Operation          | Name       | Rows  | Bytes | Cost (%CPU)| Time     ||
---------------------------------------------------------------------------------|
|   0 | SELECT STATEMENT   |            |  1862 | 44688 |    14   (8)| 00:00:01 ||
|   1 |  SORT ORDER BY     |            |  1862 | 44688 |    14   (8)| 00:00:01 ||
|*  2 |   TABLE ACCESS FULL| EX4_CUSTOM |  1862 | 44688 |    13   (0)| 00:00:01 ||
---------------------------------------------------------------------------------|
                                                                                 |
Predicate Information (identified by operation id):                              |
---------------------------------------------------                              |
                                                                                 |
   2 - filter("CUSTOMAPPLYDATE">'20010101')                                      |

만약 인덱스가 없는 경우라면 Index Range Scan 을 할 수 없을 뿐만 아니라 SORT ORDER BY 연산도 실행을 하게 됩니다

인덱스가 있는 경우

EXPLAIN PLAN FOR
SELECT * 
FROM Ex4_Custom
WHERE CUSTOMAGE > '20'
ORDER BY customApplyDate 

SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY);

PLAN_TABLE_OUTPUT                                                                |
---------------------------------------------------------------------------------+
Plan hash value: 3638973679                                                      |
                                                                                 |
---------------------------------------------------------------------------------|
| Id  | Operation          | Name       | Rows  | Bytes | Cost (%CPU)| Time     ||
---------------------------------------------------------------------------------|
|   0 | SELECT STATEMENT   |            |  7468 |   175K|    14   (8)| 00:00:01 ||
|   1 |  SORT ORDER BY     |            |  7468 |   175K|    14   (8)| 00:00:01 ||
|*  2 |   TABLE ACCESS FULL| EX4_CUSTOM |  7468 |   175K|    13   (0)| 00:00:01 ||
---------------------------------------------------------------------------------|
                                                                                 |
Predicate Information (identified by operation id):                              |
---------------------------------------------------                              |
                                                                                 |
   2 - filter("CUSTOMAGE">20)                                                    |

하지만 인덱스가 있더라도 선두 컬럼이이 WHERE 절에 포함이 되어 있지 않는다면 마찬가지로 SORT 연산을 진행하게 됩니다

인덱스 선두컬럼 내림차순

EXPLAIN PLAN FOR
SELECT * 
FROM Ex4_Custom
WHERE customApplyDate > '20010101'
AND CUSTOMAGE > '20' 
ORDER BY customApplyDate DESC 

SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY);

PLAN_TABLE_OUTPUT                                                                             |
----------------------------------------------------------------------------------------------+
Plan hash value: 4257121016                                                                   |
                                                                                              |
----------------------------------------------------------------------------------------------|
| Id  | Operation                   | Name           | Rows  | Bytes | Cost (%CPU)| Time     ||
----------------------------------------------------------------------------------------------|
|   0 | SELECT STATEMENT            |                |  1391 | 33384 |    11   (0)| 00:00:01 ||
|*  1 |  INDEX RANGE SCAN DESCENDING| EX4_CUSTOM_IDX |  1391 | 33384 |    11   (0)| 00:00:01 ||
----------------------------------------------------------------------------------------------|
                                                                                              |
Predicate Information (identified by operation id):                                           |
---------------------------------------------------                                           |
                                                                                              |
   1 - access("CUSTOMAPPLYDATE">'20010101' AND "CUSTOMAGE">20 AND "CUSTOMAPPLYDATE"           |
              IS NOT NULL)                                                                    |
       filter("CUSTOMAGE">20)                                                                 |

인덱스 선두 컬럼 내림차순은 실행계획 오퍼레이션이 다음과 같이 보이게 됩니다 INDEX RANGE SCAN DESCENDING 이는 그냥 INDEX RANGE Scan 단계에 내림차순(DESCENDING) 이 포함된 단계일 뿐입니다

ORDER BY 절에서의 컬럼가공

우리는 이제까지 WHERE 절의 인덱스에 컬럼 가공이 발생하면 인덱스를 못 타는 경우를 보았는데 이는 ORDER BY 절 또는 SELECT 절에서 가공을 하는 경우 발생하게 됩니다

EXPLAIN PLAN FOR
SELECT * 
FROM Ex4_Custom
WHERE customApplyDate > '20010101'
AND CUSTOMAGE > '20' 
ORDER BY TO_DATE(customApplyDate , 'yyyymmdd') DESC

------------------------------------------------------------------------------------|
| Id  | Operation         | Name           | Rows  | Bytes | Cost (%CPU)| Time     ||
------------------------------------------------------------------------------------|
|   0 | SELECT STATEMENT  |                |  1391 | 33384 |    12   (9)| 00:00:01 ||
|   1 |  SORT ORDER BY    |                |  1391 | 33384 |    12   (9)| 00:00:01 ||
|*  2 |   INDEX RANGE SCAN| EX4_CUSTOM_IDX |  1391 | 33384 |    11   (0)| 00:00:01 ||
------------------------------------------------------------------------------------|
                                                                                    |
Predicate Information (identified by operation id):                                 |
---------------------------------------------------                                 |
                                                                                    |
   2 - access("CUSTOMAPPLYDATE">'20010101' AND "CUSTOMAGE">20 AND                   |
              "CUSTOMAPPLYDATE" IS NOT NULL)                                        |
       filter("CUSTOMAGE">20)                                                       |

이 경우 현재 인덱스 선두 컬럼 order by 절에서 컬럼 정렬을 DATE 타입으로 변경하고 정렬을 원하고 있습니다 그렇기에 옵티마이저는 순수 customApplyDate 컬럼 데이터로 결과 셋을 만들었지만 사용자의 요청으로 인해서 TO_DATE(customApplyDate , ‘yyyymmdd’) 작업을 진행해서 다시 정렬을 하게 됩니다 그렇기에 이는 Index Rang Scan에 의한 소트 연산 생략이 불가능한 경우입니다

EXPLAIN PLAN FOR
SELECT 	TO_DATE(customApplyDate , 'yyyymmdd') customApplyDate, 
		customName,
		customSex ,
		customAge
FROM Ex4_Custom
WHERE customApplyDate > '20010101'
AND CUSTOMAGE > '20' 
ORDER BY customApplyDate  DESC

PLAN_TABLE_OUTPUT                                                                   |
------------------------------------------------------------------------------------+
Plan hash value: 1886150853                                                         |
                                                                                    |
------------------------------------------------------------------------------------|
| Id  | Operation         | Name           | Rows  | Bytes | Cost (%CPU)| Time     ||
------------------------------------------------------------------------------------|
|   0 | SELECT STATEMENT  |                |  1391 | 33384 |    12   (9)| 00:00:01 ||
|   1 |  SORT ORDER BY    |                |  1391 | 33384 |    12   (9)| 00:00:01 ||
|*  2 |   INDEX RANGE SCAN| EX4_CUSTOM_IDX |  1391 | 33384 |    11   (0)| 00:00:01 ||
------------------------------------------------------------------------------------|
                                                                                    |
Predicate Information (identified by operation id):                                 |
---------------------------------------------------                                 |

이 경우에는 함정에 빠질 수 있으나 쿼리가 동작하는 원리를 잘 생각하면 왜 SORT 연산이 생략이 안되었는지 알게 된다 쿼리의 결과 셋은 다음의 문구 순으로 만들어집니다 WHERE -> GROUP BY -> HAVING -> SELECT -> ORDER BY 이 순서로 만들어집니다 즉 이 쿼리에서 order by의 customApplyDate는 Ex4_Custom 테이블의 순수 컬럼인 customApplyDate 이 아닌 현재 TO_DATE(customApplyDate , ‘yyyymmdd’) 이 부분의 alias를 지칭하게 됩니다 그렇기에 옵티마이저는 위의 쿼리와 마찬가지로 소트 연산이 필요하다고 생각해서 추가를 하게 됩니다

EXPLAIN PLAN FOR
SELECT 	TO_DATE(A.customApplyDate , 'yyyymmdd') customApplyDate, 
		A.customName,
		A.customSex ,
		A.customAge
FROM Ex4_Custom A
WHERE A.customApplyDate > '20010101'
AND A.CUSTOMAGE > '20' 
ORDER BY A.customApplyDate  DESC 

PLAN_TABLE_OUTPUT                                                                             |
----------------------------------------------------------------------------------------------+
Plan hash value: 4257121016                                                                   |
                                                                                              |
----------------------------------------------------------------------------------------------|
| Id  | Operation                   | Name           | Rows  | Bytes | Cost (%CPU)| Time     ||
----------------------------------------------------------------------------------------------|
|   0 | SELECT STATEMENT            |                |  1391 | 33384 |    11   (0)| 00:00:01 ||
|*  1 |  INDEX RANGE SCAN DESCENDING| EX4_CUSTOM_IDX |  1391 | 33384 |    11   (0)| 00:00:01 ||
----------------------------------------------------------------------------------------------|
                                                                                              |
Predicate Information (identified by operation id):                                           |
---------------------------------------------------                                           |
                                                                                              |
   1 - access("A"."CUSTOMAPPLYDATE">'20010101' AND "A"."CUSTOMAGE">20 AND                     |
              "A"."CUSTOMAPPLYDATE" IS NOT NULL)                                              |
       filter("A"."CUSTOMAGE">20)                                                             |

만약 이런 경우라면 이때는 옵티마이저가 A.customApplyDate 가 순수 인덱스에 있는 테이블을 가리키는 것을 명시적으로 알 수 있기에 지금처럼 소트 연산을 생략하게 됩니다