SELECT ista_chil_id,
ista_code,
ista_bact_code,
level,
TRIM(REGEXP_SUBSTR(t.ista_bact_code, '[^,]+', 1, LEVEL)) AS ista_bact_code
FROM istabu t
CONNECT BY PRIOR t.ista_chil_id = t.ista_chil_id
AND PRIOR SYS_GUID() IS NOT NULL
AND LEVEL <= LENGTH(REGEXP_REPLACE(t.ista_bact_code, '[^,]+', '')) + 1
AND REGEXP_SUBSTR(t.ista_bact_code, '[^,]+', 1, LEVEL) IS NOT NULL
ORDER BY ista_chil_id,t.ista_bact_code, LEVEL;
1 #NSET2: [7, 43, 144]
2 #PRJT2: [7, 43, 144]; exp_num(5), is_atom(FALSE)
3 #SORT3: [7, 43, 144]; key_num(3), partition_key_num(0), is_distinct(FALSE), top_flag(0), is_adaptive(0)
4 #PRJT2: [6, 43, 144]; exp_num(4), is_atom(FALSE)
5 #HIERARCHICAL QUERY: [6, 43, 144]; KEY_NUM(0);
6 #PRJT2: [1, 43, 144]; exp_num(4), is_atom(FALSE)
7 #CSCN2: [1, 43, 144]; INDEX33555676(ISTABU as T); btr_scan(1)
8 #PRJT2: [1, 1, 144]; exp_num(4), is_atom(FALSE)
9 #SLCT2: [1, 1, 144]; (NOT(var2 IS NULL) AND T.ISTA_CHIL_ID = var1 AND NOT(exp11 IS NULL) AND exp11+1 >= var3)
10 #CSCN2: [1, 43, 144]; INDEX33555676(ISTABU as T); btr_scan(1)
SQL本身的逻辑是希望将ista_bact_code字段每一个字符串中以逗号分隔的值进行行转列,但在实际环境中跑了超过一个小时没有执行出结果,层次关联条件 CONNECT BY PRIOR t.ista_chil_id = t.ista_chil_id,表示上一行的 ista_chil_id 等于当前行的 ista_chil_id,在通常情况下并不会陷入“死循环”,因为 LEVEL 条件限制层级不超过 ista_bact_code 中逗号分隔的元素个数,REGEXP_SUBSTR(…) IS NOT NULL 则表示当取不到下一个元素时递归停止,同时 PRIOR SYS_GUID() IS NOT NULL 这个 “黑客” 写法,只是为了绕过 Oracle 的 ORA-01436: CONNECT BY loop in user data 错误,本身不会导致循环,但它是一种很不规范,且存在严重逻辑隐患的写法,如果 ista_chil_id 有重复值,或者表中有多行数据,会导致不同行之间错误的互联,产生笛卡尔积,将会导致结果集行数呈指数级增长。
with v1 (ista_chil_id,ista_code,ista_bact_code,lvl,c1)
as (
SELECT ista_chil_id,
ista_code,
ista_bact_code,
1 as lvl,
TRIM(REGEXP_SUBSTR(t.ista_bact_code, '[^,]+', 1, 1)) AS c1
FROM istabu t
where REGEXP_SUBSTR(t.ista_bact_code, '[^,]+', 1, 1) IS NOT NULL
union ALL
SELECT ista_chil_id,
ista_code,
ista_bact_code,
lvl + 1 as lvl,
TRIM(REGEXP_SUBSTR(v1.ista_bact_code, '[^,]+', 1, lvl + 1)) AS c1
FROM v1
where REGEXP_SUBSTR(ista_bact_code, '[^,]+', 1, lvl + 1) IS NOT NULL
)
select ista_chil_id,ista_code,ista_bact_code,lvl,c1
from v1
order by ista_chil_id,ista_bact_code,lvl;
1 #NSET2: [11, 84->197, 144]
2 #PRJT2: [11, 84->197, 144]; exp_num(5), is_atom(FALSE)
3 #SORT3: [11, 84->197, 144]; key_num(3), partition_key_num(0), is_distinct(FALSE), is_adaptive(0), MEM_USED(2048KB), DISK_USED(0KB)
4 #PRJT2: [10, 84->197, 144]; exp_num(5), is_atom(FALSE)
5 #UNION ALL: [10, 84->197, 144]; CTE(V1)
6 #PRJT2: [1, 34->35, 144]; exp_num(5), is_atom(FALSE)
7 #SLCT2: [1, 34->35, 144]; (NOT(T.ISTA_BACT_CODE IS NULL) AND NOT(exp11 IS NULL))
8 #CSCN2: [1, 43->35, 144]; INDEX33555676(ISTABU); btr_scan(1)
9 #PRJT2: [10, 50->162, 196]; exp_num(5), is_atom(FALSE)
10 #SLCT2: [10, 50->162, 196]; NOT(exp11 IS NULL)
11 #CTE_SCN: [10, 1000->197, 196]; exp_num(4), (V1)
Statistics
-----------------------------------------------------------------
0 data pages changed
0 undo pages changed
1 logical reads
0 physical reads
0 redo size
21950 bytes sent to client
64 bytes received from client
2 roundtrips to/from client
1 sorts (memory)
0 sorts (disk)
0 rows processed
0 io wait time(ms)
17 exec time(ms)
改写后彻底杜绝了结果行数爆炸的风险,递归 CTE (WITH … UNION ALL …) 是逐行独立处理的,上半部分 (UNION ALL 之前) 负责取第一个元素 (lvl=1),下半部分 (UNION ALL 之后) 递归成员负责基于上一次的结果 (lvl) 取下一个元素 (lvl+1),每一行数据的拆分只跟自己的行数据有关,不会跟其他行产生任何关联,彻底避免了笛卡尔积,SQL代码的可读性更加简洁,性能也得到大幅提升。
文章
阅读量
获赞
