SQL_튜닝_11

계속해서 인덱스를 타지 못하는 다음 경우를 보겠습니다 앞에서는 선두 컬럼 자체를 가공한 예제하면 다음은 앞 시간까지 복습을 해보면 인덱스 범위 스캔을 사용하려면 인덱스 선두 컬럼을 where 절에 사용을 해야 하고 반드시 선두 컬럼에 대해서는 컬럼 가공이 있어서는 안되었습니다 이번 시간에는 OR에 대해서 공부를 해볼 것입니다

java 소스예제

https://gitlab.com/kimdongy1000/sqltuningproject

이곳에서 Ex3_custom 를 찾자 아래 .sql 파일도 스키마도 마련해두었습니다

간단한 예제 테이블 스키마

DROP TABLE Ex3_Custom 

CREATE TABLE Ex3_Custom(

    customName varchar(10), 
    customTelNumber varchar(11)
	
)

CREATE UNIQUE INDEX ex3_custom_pk_idx ON Ex3_Custom (customName , customTelNumber);
ALTER TABLE ex3_custom ADD CONSTRAINT pk_ex3_custom PRIMARY KEY (customName , customTelNumber) USING INDEX ex3_custom_pk_idx;

테이블 구조는 다음과 같습니다 customName 과 customTelNumber 을 인덱스로 만들고 두 개의 컬럼을 PK로 두겠습니다

java 소스

package com.example.sqltuningproject.controller;

import org.mybatis.spring.SqlSessionTemplate;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.transaction.annotation.Transactional;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;

import java.util.*;

@RestController
@RequestMapping("/ex3_custom")
public class Ex3_Custom {

    private static final String[] firstNames = {"김", "이", "박", "최", "정", "강", "조", "윤", "장", "임"};
    private static final String[] middleNames = {"민", "서", "지", "현", "승", "예", "영", "우", "준", "석"};
    private static final String[] lastNames = {"호", "빈", "희", "아", "수", "진", "연", "혁", "기", "찬"};

    private static final int TOTAL_NUMBERS = 500; // 생성할 전화번호 개수
    private static final String PREFIX = "010";  // 앞자리 고정
    private static final int PHONE_LENGTH = 8;   // 랜덤 생성할 숫자 자리 수

    @Autowired
    private SqlSessionTemplate sqlSessionTemplate;

    @GetMapping("/create")

    public String create()
    {
        List<Custom> customs = new ArrayList<>();
        Set<String> phoneNumbers = new HashSet<>();
        Random random = new Random();



        while (phoneNumbers.size() < TOTAL_NUMBERS) {

            String randomDigits = String.format("%08d", random.nextInt(100_000_000));
            String phoneNumber = PREFIX + randomDigits;

            phoneNumbers.add(phoneNumber);
        }

        for (String phoneNumber : phoneNumbers) {
            String name = generateRandomName(random);
            customs.add(new Custom(name, phoneNumber));
        }

        for (Custom custom : customs) {
            sqlSessionTemplate.insert(this.getClass().getName() + ".create", custom);
        }

        return "OK";


    }

    private static String generateRandomName(Random random) {
        return firstNames[random.nextInt(firstNames.length)] +
                middleNames[random.nextInt(middleNames.length)] +
                lastNames[random.nextInt(lastNames.length)];
    }
}

class Custom{

    private final String customName;
    private final String customTelNumber;

    public Custom(String customName, String customTelNumber) {
        this.customName = customName;
        this.customTelNumber = customTelNumber;
    }

    @Override
    public String toString() {
        return String.format("INSERT INTO ex3_custom (customName, customTelNumber) VALUES (%d, %d);", customName , customTelNumber);

    }
}

해당 java 소스는 총 500명의 데이터를 만들어내는데 이때 고객의 이름과 전화번호를 생성해서 insert 하게 됩니다

OR 은 Index Range Scan 을 할 수 있을까?

EXPLAIN PLAN FOR
SELECT * FROM Ex3_Custom
WHERE (customName = '이민빈' OR customTelNumber = '01057564495')

SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY);


PLAN_TABLE_OUTPUT                                                               |
--------------------------------------------------------------------------------+
Plan hash value: 2013170750                                                     |
                                                                                |
--------------------------------------------------------------------------------|
| Id  | Operation         | Name       | Rows  | Bytes | Cost (%CPU)| Time     ||
--------------------------------------------------------------------------------|
|   0 | SELECT STATEMENT  |            |     3 |    42 |     3   (0)| 00:00:01 ||
|*  1 |  TABLE ACCESS FULL| EX3_CUSTOM |     3 |    42 |     3   (0)| 00:00:01 ||
--------------------------------------------------------------------------------|
                                                                                |
Predicate Information (identified by operation id):                             |
---------------------------------------------------                             |
                                                                                |
   1 - filter("CUSTOMNAME"='이민빈' OR "CUSTOMTELNUMBER"='01057564495')            |
                                                                                |
Note                                                                            |
-----                                                                           |
   - dynamic statistics used: dynamic sampling (level=2)                        |

이러한 쿼리에서는 옵티마이저는 어떠한 인덱스 또는 힌트를 주더라도 범위 스캔을 할 수 없습니다 그 이유는 다음과 같습니다

예를 들어 B-Tree 가 이렇게 정렬이 되어 있다고 할 때 현재 index는 customName , customTelNumber이기 때문에 customName 먼저 정렬이 된 상태에서 customTelNumber를 찾게 됩니다 즉 선두 컬럼인 CUSTOMNAME = ‘이민빈’ 에 대해서 정렬을 하게 되고 그에 따라서 이민빈을 찾게 되지만 뒤에 따라오는 CUSTOMTELNUMBER는 CUSTOMNAME에 의해서 정렬된 상태로 연속된 데이터를 가지기 때문에 지금처럼 완전히 떨어진 곳에 있는 데이터를 찾으러 가야 하기 때문에 옵티마이저는 Index Range Scan 을 사용할 수 없고 Table Full Scan 을 사용하게 됩니다

OR 은 무조건 Index 범위 스캔이 안되냐

EXPLAIN PLAN FOR
SELECT * FROM Ex3_Custom
WHERE (customName = '박승아' OR customName = '이민빈')

SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY);

PLAN_TABLE_OUTPUT                                                                      |
---------------------------------------------------------------------------------------+
Plan hash value: 3550730589                                                            |
                                                                                       |
---------------------------------------------------------------------------------------|
| Id  | Operation         | Name              | Rows  | Bytes | Cost (%CPU)| Time     ||
---------------------------------------------------------------------------------------|
|   0 | SELECT STATEMENT  |                   |     4 |    56 |     1   (0)| 00:00:01 ||
|   1 |  INLIST ITERATOR  |                   |       |       |            |          ||
|*  2 |   INDEX RANGE SCAN| EX3_CUSTOM_PK_IDX |     4 |    56 |     1   (0)| 00:00:01 ||
---------------------------------------------------------------------------------------|
                                                                                       |
Predicate Information (identified by operation id):                                    |
---------------------------------------------------                                    |
                                                                                       |
   2 - access("CUSTOMNAME"='박승아' OR "CUSTOMNAME"='이민빈')                                
                                                                                       |
Note                                                                                   |
-----                                                                                  |
   - dynamic statistics used: dynamic sampling (level=2)                               |

이 경우는 당연하게도 CUSTOMNAME으로 정렬된 상태이기 때문에 OR로 연결을 할지라도 인덱스 범위 스캔을 사용할 있습니다 만약 지금처럼 서로 다른 2개의 컬럼을 사용한다면 union all 을 사용하면 됩니다

EXPLAIN PLAN FOR
SELECT * FROM Ex3_Custom
WHERE customName = '이민빈' 
UNION ALL 
SELECT * FROM Ex3_Custom
WHERE customTelNumber = '01057564495' 


SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY);

PLAN_TABLE_OUTPUT                                                                       |
----------------------------------------------------------------------------------------+
Plan hash value: 4259027061                                                             |
                                                                                        |
----------------------------------------------------------------------------------------|
| Id  | Operation          | Name              | Rows  | Bytes | Cost (%CPU)| Time     ||
----------------------------------------------------------------------------------------|
|   0 | SELECT STATEMENT   |                   |     3 |    42 |     4   (0)| 00:00:01 ||
|   1 |  UNION-ALL         |                   |       |       |            |          ||
|*  2 |   INDEX RANGE SCAN | EX3_CUSTOM_PK_IDX |     2 |    28 |     1   (0)| 00:00:01 ||
|*  3 |   TABLE ACCESS FULL| EX3_CUSTOM        |     1 |    14 |     3   (0)| 00:00:01 ||
----------------------------------------------------------------------------------------|
                                                                                        |
Predicate Information (identified by operation id):                                     |
---------------------------------------------------                                     |
                                                                                        |
   2 - access("CUSTOMNAME"='이민빈')                                                       |
   3 - filter("CUSTOMTELNUMBER"='01057564495')                                          |
                                                                                        |
Note                                                                                    |
-----                                                                                   |
   - dynamic statistics used: dynamic sampling (level=2)                                |

이렇게 하면 union all 상단부분은 index를 탈 수 있게 만들 수 있습니다

이때 왜 union all 아랫부분은 인덱스 스캔이 안되는 이유는 당연하게도 인덱스를 만들 때 CUSTOMNAME으로 정렬해서 만들었기 때문에 선두 컬럼이 없으면 아무리 인덱스에 들어가 있다고 할지라도 단독으로 쓰일 경우 범위 스캔을 사용하지 않고 full scan 을 유도하게 됩니다 그렇기에 이때는 인덱스를 따로따로 만들어서 힌트를 각각 만들어주는 것이 좋습니다

인덱스를 각각 만들어서 힌트제공

ALTER TABLE Ex3_Custom DROP CONSTRAINT pk_ex3_custom;
DROP INDEX ex3_custom_pk_idx
CREATE INDEX ex3_custom_customName_idx ON Ex3_Custom (customName);
CREATE INDEX ex3_custom_customTelNumber_idx ON Ex3_Custom (customTelNumber);

다음과 같이 기존 PK를 제거하고 개별적인 인덱스를 만들어주게 됩니다 이때 각각의 값이 유일하지 않기 때문에 unique index 가 아닌 그냥 index를 사용합니다

EXPLAIN PLAN FOR
SELECT /*+ INDEX(Ex3_Custom ex3_custom_customName_idx) */
* FROM Ex3_Custom
WHERE customName = '이민빈' 
UNION ALL 
SELECT /*+ INDEX(Ex3_Custom ex3_custom_customTelNumber_idx) */
* FROM Ex3_Custom
WHERE customTelNumber = '01057564495' 



SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY);

PLAN_TABLE_OUTPUT                                                                                                      |
-----------------------------------------------------------------------------------------------------------------------+
Plan hash value: 2350922940                                                                                            |
                                                                                                                       |
-----------------------------------------------------------------------------------------------------------------------|
| Id  | Operation                            | Name                           | Rows  | Bytes | Cost (%CPU)| Time     ||
-----------------------------------------------------------------------------------------------------------------------|
|   0 | SELECT STATEMENT                     |                                |     3 |    42 |     5   (0)| 00:00:01 ||
|   1 |  UNION-ALL                           |                                |       |       |            |          ||
|   2 |   TABLE ACCESS BY INDEX ROWID BATCHED| EX3_CUSTOM                     |     2 |    28 |     3   (0)| 00:00:01 ||
|*  3 |    INDEX RANGE SCAN                  | EX3_CUSTOM_CUSTOMNAME_IDX      |     2 |       |     1   (0)| 00:00:01 ||
|   4 |   TABLE ACCESS BY INDEX ROWID BATCHED| EX3_CUSTOM                     |     1 |    14 |     2   (0)| 00:00:01 ||
|*  5 |    INDEX RANGE SCAN                  | EX3_CUSTOM_CUSTOMTELNUMBER_IDX |     1 |       |     1   (0)| 00:00:01 ||
-----------------------------------------------------------------------------------------------------------------------|
                                                                                                                       |
Predicate Information (identified by operation id):                                                                    |
---------------------------------------------------                                                                    |
                                                                                                                       |
   3 - access("CUSTOMNAME"='이민빈')                                                                                      |
   5 - access("CUSTOMTELNUMBER"='01057564495')                                                                         |
                                                                                                                       |
Note                                                                                                                   |
-----                                                                                                                  |
   - dynamic statistics used: dynamic sampling (level=2)                                                               |

옵티마이저에게 각각의 힌트 인덱스를 제공하면 다음과 같이 범위 스캔을 사용할 수 있게 됩니다

개발자의 딜레마

지금은 데이터가 적은 표본으로 했기 때문에 실행 계획상 오히려 아래 데이터가 더 cost 가 높게 나왔다 하지만 데이터가 많으면 많을수록 아래 힌트 쿼리가 속도가 매우 빠르게 된다 하지만 OR를 만날 때마다 지금처럼 union all로 변경할 수 없다 애플리케이션 유지 보수 비용 및 그때마다의 상황이 다르기 때문이다 이때는 DB 튜너의 도움을 받는 것을 추천합니다

SQL_튜닝_11

java 소스예제

간단한 예제 테이블 스키마

java 소스

OR 은 Index Range Scan 을 할 수 있을까?

OR 은 무조건 Index 범위 스캔이 안되냐

인덱스를 각각 만들어서 힌트제공

개발자의 딜레마

관련된 글

SQL_튜닝_28

SQL_튜닝_29

SQL_튜닝_30