前言:
还是最近遇到的慢SQL,本质上也是标量子查询,但稍微有一点绕,它是通过自定义函数来return返回值,这种情况如果主查询数据量越大,往函数中传参次数也就越多,相当于强制嵌套循环。
--构建环境
create table test10(clntid varchar2(10),mkey varchar2(10),grupid varchar2(10),srvcode varchar2(10),ekey varchar2(10),edate timestamp(0));
create table test11(clntid varchar2(10),srvcode varchar2(10),klass varchar2(10));
create index idx_test10_01 on test10(clntid,mkey,grupid,edate desc);
create index idx_test11_01 on test11(clntid,srvcode,klass);
create table test12(accountid number,acctype varchar2(10));
create table test13(clntid varchar2(5),mkey varchar2(10),accountid number,plan varchar2(1),edate timestamp(0),EKEY VARCHAR2(3));
--数据随便造点
--自定义函数
create or replace function getkey(cpclntid varchar2,cpmkey varchar2)
return varchar2 as
cursor curer is select ekey
from test10
where clntid = cpclntid
and mkey = cpmkey
and grupid = 'SV'
order by edate desc;
cursor curerv is select ms.ekey
from test10 ms,
test11 cs
where ms.clntid=cs.clntid
and ms.srvcode=cs.srvcode
and cs.klass='F'
and ms.grupid='SV'
and ms.clntid=cpclntid
and ms.mkey=cpmkey
order by ms.edate desc;
cekey test10.ekey%type;
cGetLastEr varchar2(1) := 'N';
begin
open curerv;
fetch curerv into cekey;
if curerv%notfound then
cGetLastEr := 'Y';
end if;
close curerv;
if cGetLastEr = 'Y' then
open curer;
fetch curer into cekey;
close curer;
end if;
return cekey;
end;
/
问题SQL如下:
select *
from (select CLNTID,
MKEY,
ekey,
count(*)
from (select b.CLNTID,
b.MKEY,
b.accountid,
getkey(b.clntid,b.mkey) as ekey
from test12 a,
test13 b
where b.accountid=a.accountid
and a.acctype='C')
group by CLNTID,MKEY,ekey
order by 1,2,3 desc )
where rownum <= 200;
--执行计划
1 #NSET2: [68, 1000->200, 252]
2 #PRJT2: [68, 1000->200, 252]; exp_num(4), is_atom(FALSE)
3 #PRJT2: [68, 1000->200, 252]; exp_num(4), is_atom(FALSE)
4 #SORT3: [68, 1000->200, 252]; key_num(3), is_distinct(FALSE), is_adaptive(0), MEM_USED(47104KB), DISK_USED(0KB)
5 #HAGR2: [58, 100000->70246, 252]; grp_num(3), sfun_num(1), MEM_USED(4395KB), DISK_USED(0KB), distinct_flag[0]; slave_empty(0) keys(V_OLD.CLNTID, V_OLD.MKEY, V_OLD.EKEY)
6 #PRJT2: [39, 201630->222230, 252]; exp_num(3), is_atom(FALSE)
7 #HASH2 INNER JOIN: [39, 201630->222230, 252]; KEY_NUM(1), MEM_USED(15022KB), DISK_USED(0KB) KEY(A.ACCOUNTID=B.ACCOUNTID) KEY_NULL_EQU(0)
8 #SLCT2: [13, 20161->20161, 78]; A.ACCTYPE = 'C'
9 #CSCN2: [13, 100000->100000, 78]; INDEX33633542(TEST12)
10 #CSCN2: [14, 100000->100000, 174]; INDEX33633546(TEST13)
Statistics
-----------------------------------------------------------------
0 data pages changed
0 undo pages changed
2538427 logical reads
0 physical reads
0 redo size
11315 bytes sent to client
376 bytes received from client
2 roundtrips to/from client
0 sorts (memory)
0 sorts (disk)
200 rows processed
0 io wait time(ms)
14419 exec time(ms)
--ET
PRJT2 3 0% 10 3 4 0 0 0 0 null null 0
PRJT2 3 0% 10 2 4 0 0 0 0 null null 0
DLCK 17 0% 9 0 3 0 0 0 0 null null 0
NSET2 242 0% 8 1 4 0 0 0 0 null null 0
SLCT2 2104 0.01% 7 8 670 0 0 0 0 null null 0
SORT3 2498 0.02% 6 4 241 47104 0 0 0 null null 0
CSCN2 8120 0.06% 5 9 335 0 0 0 0 null null 0
CSCN2 14254 0.10% 4 10 335 0 0 0 0 null null 0
HI3 68422 0.48% 3 7 1670 15022 0 8613 0 null null 17463
HAGR2 90289 0.63% 2 5 1239 4395 0 50642 19604 null null 0
PRJT2 14151077 98.70% 1 6 2000 0 0 0 0 null null 0
1、首先读懂SQL,可以分为两层结构,最内层的派生表通过getkey函数计算出ekey,函数中首先open cursor curerv,如果cursor curerv数据未找到,则open cursor curer,最终return一个ekey值,外层再以返回值做一个count(*)聚合分组;
2、通过执行计划、trace和et可以看到这条sql主要代价在第6步的PRJT,也就是自定义函数计算部分,整个sql执行14.4s,函数计算就要占到98.7%,整体2538427的逻辑读;
3、test12和test13关联后返回的b.clntid和b.mkey就要依次带入函数中进行计算,结果集越大,计算次数就会越多,效率就越差;
那么这种情况下,如何对SQL进行等价改写?
实际上,内层的函数也是通过执行SQL获得结果,只不过有一层对数据"是否未找到"的判断逻辑,以下为改写后的SQL
--改写后
select *
from ( select CLNTID,
MKEY,
ekey,
count(*)
from (select b.CLNTID,
b.MKEY,
b.plan,
b.accountid,
nvl(v2.ekey,v1.ekey) ekey
from test12 a,
test13 b
left join (select ekey,clntid,mkey,row_number() over(partition by clntid,mkey order by edate desc) rn
from test10
where grupid = 'SV') v1
on v1.clntid = b.clntid
and v1.mkey = b.mkey
and v1.rn=1
left join (select t1.ekey,t1.clntid,t1.mkey,row_number() over(partition by t1.clntid,t1.mkey order by t1.edate desc) rn
from test10 t1,
test11 t2
where t1.clntid=t2.clntid
and t1.srvcode=t2.srvcode
and t2.klass='F'
and t1.grupid='SV' ) v2
on v2.clntid=b.clntid
and v2.mkey=b.mkey
and v2.rn=1
where b.accountid=a.accountid
and a.acctype='C')
group by CLNTID,MKEY,ekey
order by 1,2,3 desc )
where rownum <= 200;
1、首先将函数中的两个SQL放到外层与test13 b表进行left join,但原来的逻辑是按照edate倒序排列获取ekey值,因此在改写时通过开窗函数,以clntid,mkey为组,取每个组里edate倒序排列后的第一个ekey,clntid,mkey,再将clntid和mkey与左表test13的clntid和mkey相关联;
2、由于left join后的结果集中右表未关联到的会置为空,所以可以用nvl(v2.ekey,v1.ekey)代替原来函数中if curerv%notfound的逻辑,获取到最终的ekey结果,再做最外层的group by;
--改写后执行计划
1 #NSET2: [30826118, 100000->200, 854]
2 #PRJT2: [30826118, 100000->200, 854]; exp_num(4), is_atom(FALSE)
3 #PRJT2: [30826118, 100000->200, 854]; exp_num(4), is_atom(FALSE)
4 #SORT3: [30826118, 100000->200, 854]; key_num(3), is_distinct(FALSE), is_adaptive(0), MEM_USED(37376KB), DISK_USED(0KB)
5 #HAGR2: [30486868, 10000000->70246, 854]; grp_num(3), sfun_num(1), MEM_USED(4395KB), DISK_USED(0KB), distinct_flag[0]; slave_empty(0) keys(DMTEMPVIEW_889208327.CLNTID, DMTEMPVIEW_889208327.MKEY, DMTEMPVIEW_889208327.EKEY)
6 #PRJT2: [655, 897738464->222230, 854]; exp_num(3), is_atom(FALSE)
7 #HASH RIGHT JOIN2: [655, 897738464->222230, 854]; key_num(2); col_num(4); MEM_USED(24750KB), DISK_USED(0KB) KEY(V2.CLNTID=B.CLNTID AND V2.MKEY=B.MKEY)
8 #SLCT2: [354, 230812->22483, 397]; V2.RN = var2
9 #PRJT2: [46, 9232500->27508, 397]; exp_num(4), is_atom(FALSE)
10 #AFUN: [46, 9232500->27508, 397]; afun_num(1)
11 #SORT3: [46, 9232500->27508, 397]; key_num(3), is_distinct(FALSE), is_adaptive(0), MEM_USED(37347KB), DISK_USED(0KB)
12 #HASH2 INNER JOIN: [46, 9232500->27508, 397]; KEY_NUM(2), MEM_USED(24238KB), DISK_USED(0KB) KEY(T2.CLNTID=T1.CLNTID AND T2.SRVCODE=T1.SRVCODE) KEY_NULL_EQU(0, 0)
13 #SLCT2: [14, 33237->33237, 144]; T2.KLASS = 'F'
14 #SSCN: [14, 100000->100000, 144]; IDX_TEST11_01(TEST11)
15 #SLCT2: [16, 100000->100000, 253]; T1.GRUPID = 'SV'
16 #CSCN2: [16, 100000->100000, 253]; INDEX33633536(TEST10)
17 #HASH RIGHT JOIN2: [79, 1400209->222230, 457]; key_num(2); col_num(3); MEM_USED(41134KB), DISK_USED(0KB) KEY(V1.CLNTID=B.CLNTID AND V1.MKEY=B.MKEY)
18 #SLCT2: [19, 2500->76880, 205]; V1.RN = var3
19 #PRJT2: [15, 100000->100000, 205]; exp_num(4), is_atom(FALSE)
20 #AFUN: [15, 100000->100000, 205]; afun_num(1)
21 #SORT3: [15, 100000->100000, 205]; key_num(3), is_distinct(FALSE), is_adaptive(0), MEM_USED(38909KB), DISK_USED(0KB)
22 #SLCT2: [15, 100000->100000, 205]; TEST10.GRUPID = 'SV'
23 #CSCN2: [15, 100000->100000, 205]; INDEX33633536(TEST10)
24 #HASH2 INNER JOIN: [39, 201630->222230, 252]; KEY_NUM(1), MEM_USED(15022KB), DISK_USED(0KB) KEY(A.ACCOUNTID=B.ACCOUNTID) KEY_NULL_EQU(0)
25 #SLCT2: [13, 20161->20161, 78]; A.ACCTYPE = 'C'
26 #CSCN2: [13, 100000->100000, 78]; INDEX33633542(TEST12)
27 #CSCN2: [14, 100000->100000, 174]; INDEX33633546(TEST13)
Statistics
-----------------------------------------------------------------
0 data pages changed
0 undo pages changed
728 logical reads
0 physical reads
0 redo size
11315 bytes sent to client
1594 bytes received from client
2 roundtrips to/from client
2 sorts (memory)
0 sorts (disk)
200 rows processed
0 io wait time(ms)
519 exec time(ms)
从执行计划也可以看出改写后是test10和test11先关联后,V1和V2再与test13做hash join,逻辑读从2538427降至728,执行时间也从14s降至519ms。
1、SQL中投影部分的自定义函数本质上也是标量子查询的一种,因此数据量大的情况也可以通过left join改写;
2、这种情况只能用嵌套循环来计算结果无法hash关联,因此在驱动表数据量大时还是尽量避免自定义函数处理;
文章
阅读量
获赞