💡 Lecture/데이터분석가 회사에서 많이 쓰는 완전 기초 쿼리

[SQL] 데이터분석가 많이 쓰는 완전 기초 쿼리 4탄 : row number, (dense) rank, ntile

미미누 2023. 11. 10. 21:17

 

https://www.youtube.com/watch?v=TkLIlVCFaLE&t=4s

본 글은 [SQL] 데이터분석가 회사에서 많이 쓰는 완전 기초 쿼리, 을 시청하고 정리한 글입니다.


분할하는 쿼리

PARTITION BY

 

PARTITION BY 고객명 ORDER BY 방문일자

→ 고객명으로 분할하고, 방문일자로 정렬하기

 

PARTITION BY 고객명, 구매항목

→ 고객명으로 우선 분할하고, 구매항목으로 분할하기

 

PARTITION BY 고객명, 구매항목 ORDER BY 방문일자 DESC

 

ROW_NUMBER()

→ 순서가 동차라면 랜덤으로 1번, 2번을 정함

 

ROW_NUMBER()의 예시

ROW_NUMBER () OVER (PARTITION BY 고객명 ORDER BY 방문일자) AS 순서

RANK () OVER (ORDER BY 등급)

→ 동점은 같은 순위로 하고, 사람 수만큼 건너뜀

 

DENSE_RANK () OVER (ORDER BY 등급)

→ 건너뛰는 등급 없이 순서대로 등급을 구분함

 

NTILE(3)OVER (ORDER BY 등급)

  • 3개의 그룹으로 나누어 타일로 구분하는 쿼리

 

CREATE TABLE DASQL.PRACTICE_PARTITION_BY
(
    CUST_NAME VARCHAR(10) NOT NULL
    , VST_DT DATE
    , BUY_NM VARCHAR(30)
)

INSERT INTO dasql.PRACTICE_PARTITION_BY VALUES ('래빗', STR_TO_DATE('2022.07.04', '%Y.%m.%d'), '과자');
INSERT INTO dasql.PRACTICE_PARTITION_BY VALUES ('래빗', STR_TO_DATE('2022.07.08', '%Y.%m.%d'), '야채');
INSERT INTO dasql.PRACTICE_PARTITION_BY VALUES ('래빗', STR_TO_DATE('2022.07.11', '%Y.%m.%d'), '음료');
INSERT INTO dasql.PRACTICE_PARTITION_BY VALUES ('래빗', STR_TO_DATE('2022.07.15', '%Y.%m.%d'), '과자');
INSERT INTO dasql.PRACTICE_PARTITION_BY VALUES ('래빗', STR_TO_DATE('2022.07.23', '%Y.%m.%d'), '야채');
INSERT INTO dasql.PRACTICE_PARTITION_BY VALUES ('버드', STR_TO_DATE('2022.07.05', '%Y.%m.%d'), '야채');
INSERT INTO dasql.PRACTICE_PARTITION_BY VALUES ('버드', STR_TO_DATE('2022.07.21', '%Y.%m.%d'), '음료');
INSERT INTO dasql.PRACTICE_PARTITION_BY VALUES ('타이거', STR_TO_DATE('2022.07.02', '%Y.%m.%d'), '생선');
INSERT INTO dasql.PRACTICE_PARTITION_BY VALUES ('타이거', STR_TO_DATE('2022.07.16', '%Y.%m.%d'), '고기');
INSERT INTO dasql.PRACTICE_PARTITION_BY VALUES ('타이거', STR_TO_DATE('2022.07.31', '%Y.%m.%d'), '고기');

SELECT * FROM dasql.PRACTICE_PARTITION_BY;

INSERT INTO dasql.PRACTICE_PARTITION_BY (CUST_NAME, VST_DT, BUY_NM)
VALUES
('래빗', STR_TO_DATE('2022.07.04', '%Y.%m.%d'), '과자')


INSERT INTO dasql.PRACTICE_PARTITION_BY VALUES ('래빗', NULL, '야채')

SELECT CUST_NAME AS 고객명
    , DATE_FORMAT(VST_DT, '%Y-%m-%d') AS 방문일
    , BUY_NM AS 구매항목
    , ROW_NUMBER() over (PARTITION BY CUST_NAME ORDER BY VST_DT) AS 행번호1
    , ROW_NUMBER() over (PARTITION BY CUST_NAME ORDER BY VST_DT DESC) AS 행번호2
    , ROW_NUMBER() over (PARTITION BY CUST_NAME, BUY_NM ORDER BY VST_DT) AS 행번호3
    , ROW_NUMBER() over (PARTITION BY CUST_NAME, BUY_NM ORDER BY VST_DT DESC) AS 행번호4
    , ROW_NUMBER() over (PARTITION BY CUST_NAME, BUY_NM ORDER BY VST_DT is null asc, VST_DT desc) AS 행번호5
FROM dasql.PRACTICE_PARTITION_BY ppb;

CREATE TABLE dasql.PRACTICE_RANK
(
    ST_NAME VARCHAR(10) NOT NULL
    , GRADE VARCHAR(10)
)

INSERT INTO dasql.PRACTICE_RANK (ST_NAME, GRADE)
VALUES
    ('캣', 'A'),
    ('판다', 'A'),
    ('버드', 'B'),
    ('몽키', 'B'),
    ('타이거', 'B'),
    ('캥거루', 'C'),
    ('폭스', 'D'),
    ('래빗', 'F')

SELECT * FROM dasql.PRACTICE_RANK

SELECT ST_NAME AS 학생
    , GRADE AS 등급
    , RANK()OVER(ORDER BY GRADE) AS 순위1
    , RANK()OVER(ORDER BY GRADE DESC) AS 순위2
    , DENSE_RANK() over (ORDER BY GRADE) AS 순위3
    , DENSE_RANK() over (ORDER BY GRADE DESC) AS 순위4
    , NTILE(3) over (ORDER BY GRADE) AS 타일1
    , NTILE(3) over (ORDER BY GRADE DESC ) AS 타일2
FROM dasql.PRACTICE_RANK

SELECT GRADE
    , COUNT(*) AS 개수
    , MIN()
FROM dasql.PRACTICE_RANK
WHERE GRADE LIKE '%B%'
GROUP BY GRADE;