最近在项目上碰到一些sql脚本由于错误使用标量子查询引起性能问题,接下来通过构建简单的例子进行分析。
drop table if exists test1;
drop table if exists test2;
create table test1 (id int primary key,c1 int,c2 varchar(10),c3 TIMESTAMP(0),c4 int);
create table test2 (id int primary key,c1 int,c2 varchar(10),c3 TIMESTAMP(0),c4 int);
insert into test1 select level,dbms_random.value(1,100000),dbms_random.string('U',10),SYSDATE - TRUNC(DBMS_RANDOM.VALUE(-180,180)),dbms_random.value(1,6) from dual connect by level<=1000000;
commit;
insert into test2 select level,dbms_random.value(1,100000),dbms_random.string('U',10),SYSDATE - TRUNC(DBMS_RANDOM.VALUE(-180,180)),dbms_random.value(1,6) from dual connect by level<=10000000;
commit;
DBMS_STATS.GATHER_TABLE_STATS(USER,'TEST1',null,100);
DBMS_STATS.GATHER_TABLE_STATS(USER,'TEST2',null,100);
select
sum(nvl((select c1 from test2 t where t.id=t2.id and t.c4=1),0)) as sum_c4_1,
sum(nvl((select c1 from test2 t where t.id=t2.id and t.c4=2),0)) as sum_c4_2,
sum(nvl((select c1 from test2 t where t.id=t2.id and t.c4=3),0)) as sum_c4_3
from
test1 t1,
test2 t2
where
t1.id =t2.id
and t1.c4 =1
and to_number(to_char(t1.c3, 'YYYY'))=2022
autotrace&et:
LINEID SUM_C4_1 SUM_C4_2 SUM_C4_3
---------- -------------------- -------------------- --------------------
1 260664363 507449993 512535726
1 #NSET2: [9507, 1->1, 32]
2 #PIPE2: [9507, 1->1, 32]
3 #PIPE2: [6483, 1->1, 32]
4 #PIPE2: [3329, 1->1, 32]
5 #PRJT2: [174, 1->1, 32]; exp_num(3), is_atom(FALSE)
6 #AAGR2: [174, 1->1, 32]; grp_num(0), sfun_num(3)
7 #PRJT2: [174, 2506->51683, 32]; exp_num(3), is_atom(FALSE)
8 #HASH2 INNER JOIN: [174, 2506->50687, 32]; LRKEY_UNIQUE KEY_NUM(1);
9 #NEST LOOP INDEX JOIN2: [174, 2506->996, 32]
10 #ACTRL: [174, 2506->51683, 32];
11 #SLCT2: [118, 2506->51683, 20];
12 #CSCN2: [118, 1000000->1000000, 20]; INDEX33555471(TEST1)
13 #SSEK2: [11, 1->996, 4]; scan_type(ASC), INDEX33555474(TEST2)
14 #SSCN: [1073, 10000000->10000000, 12]; INDEX33555474(TEST2)
15 #SPL2: [3154, 1999358, 24]; key_num(2), spool_num(2), has_variable(0)
16 #PRJT2: [3154, 1999358->1999358, 24]; exp_num(2), is_atom(FALSE)
17 #HASH2 INNER JOIN: [3154, 1999358->1999358, 24]; LRKEY_UNIQUE KEY_NUM(1);
18 #SLCT2: [1169, 1999358->1999358, 12];
19 #CSCN2: [1169, 10000000->10000000, 12]; INDEX33555473(TEST2)
20 #SSCN: [1073, 10000000->10000000, 12]; INDEX33555474(TEST2)
21 #SPL2: [3154, 1999235, 24]; key_num(2), spool_num(1), has_variable(0)
22 #PRJT2: [3154, 1999235->1999235, 24]; exp_num(2), is_atom(FALSE)
23 #HASH2 INNER JOIN: [3154, 1999235->1999235, 24]; LRKEY_UNIQUE KEY_NUM(1);
24 #SLCT2: [1169, 1999235->1999235, 12];
25 #CSCN2: [1169, 10000000->10000000, 12]; INDEX33555473(TEST2)
26 #SSCN: [1073, 10000000->10000000, 12]; INDEX33555474(TEST2)
27 #SPL2: [3024, 999867, 24]; key_num(2), spool_num(0), has_variable(0)
28 #PRJT2: [3024, 999867->999867, 24]; exp_num(2), is_atom(FALSE)
29 #HASH2 INNER JOIN: [3024, 999867->999867, 24]; LRKEY_UNIQUE KEY_NUM(1);
30 #SLCT2: [1169, 999867->999867, 12];
31 #CSCN2: [1169, 10000000->10000000, 12]; INDEX33555473(TEST2)
32 #SSCN: [1073, 10000000->10000000, 12]; INDEX33555474(TEST2)
used time: 00:00:07.029. Execute id is 1004.
SQL> et(1004);
LINEID OP TIME(US) PERCENT RANK SEQ N_ENTER HASH_USED_CELLS HASH_CONFLICT
---------- ----- -------------------- ------- -------------------- ----------- ----------- -------------------- --------------------
1 SPL2 0 0% 17 21 1 0 0
2 SPL2 0 0% 17 15 1 0 0
3 PIPE2 2 0% 16 3 5 0 0
4 SPL2 3 0% 15 27 1 0 0
5 PIPE2 4 0% 14 2 5 0 0
6 PRJT2 11 0% 13 5 4 0 0
7 DLCK 17 0% 12 0 2 0 0
8 ACTRL 2162 0.03% 11 10 6671 0 0
9 NSET2 3764 0.05% 10 1 3 0 0
10 IJI2 3769 0.05% 9 9 9594 0 0
11 PIPE2 3948 0.06% 8 4 5 0 0
LINEID OP TIME(US) PERCENT RANK SEQ N_ENTER HASH_USED_CELLS HASH_CONFLICT
---------- ----- -------------------- ------- -------------------- ----------- ----------- -------------------- --------------------
12 AAGR2 25475 0.37% 7 6 4268 0 0
13 CSCN2 38735 0.56% 6 12 3335 0 0
14 SSEK2 60685 0.88% 5 13 1992 0 0
15 HI3 130433 1.89% 4 8 41868 49566 1121
16 SSCN 134061 1.94% 3 14 33335 0 0
17 SLCT2 170205 2.46% 2 11 6670 0 0
18 PRJT2 6344202 91.71% 1 7 8532 0 0
18 rows got
used time: 114.489(ms). Execute id is 1005.
从et中可以看到耗时在PRJT2,是查询项引起的耗时,而语句中的查询项涉及到标量子查询,标量子查询中的表test2又出现在主查询中,显然可以减少test2的查询次数,将不同于主查询的条件用case when放到查询项中,从而消除标量子查询。
select sum(nvl(case when t2.c4=1 then t2.c1 end,0)) as sum_c4_1,
sum(nvl(case when t2.c4=2 then t2.c1 end,0)) as sum_c4_2,
sum(nvl(case when t2.c4=3 then t2.c1 end,0)) as sum_c4_3
from test1 t1,
test2 t2
where
t1.id =t2.id
and t1.c4 =1
and to_number(to_char(t1.c3, 'YYYY'))=2022
autotrace
LINEID SUM_C4_1 SUM_C4_2 SUM_C4_3
---------- -------------------- -------------------- --------------------
1 260664363 507449993 512535726
1 #NSET2: [200, 1->1, 32]
2 #PRJT2: [200, 1->1, 32]; exp_num(3), is_atom(FALSE)
3 #AAGR2: [200, 1->1, 32]; grp_num(0), sfun_num(3)
4 #PRJT2: [200, 2506->51683, 32]; exp_num(3), is_atom(FALSE)
5 #HASH2 INNER JOIN: [200, 2506->50687, 32]; LRKEY_UNIQUE KEY_NUM(1);
6 #NEST LOOP INDEX JOIN2: [200, 2506->996, 32]
7 #ACTRL: [200, 2506->51683, 32];
8 #SLCT2: [118, 2506->51683, 20];
9 #CSCN2: [118, 1000000->1000000, 20]; INDEX33555471(TEST1)
10 #BLKUP2: [16, 1->996, 4]; INDEX33555474(TEST2)
11 #SSEK2: [16, 1->996, 4]; scan_type(ASC), INDEX33555474(TEST2)
12 #CSCN2: [1073, 10000000->10000000, 12]; INDEX33555473(TEST2)
used time: 454.043(ms). Execute id is 1006.
像这种情况,标量子查询中的表出现在主查询中,可以通过case when把不同于主查询的条件放到查询项中进行查询从而消除标量子查询的代价,大大提升性能。
文章
阅读量
获赞