在项目中,接到这样的一个业务需求,找出有5个证书这样含金量较高的员工信息。
drop table if exists test;
CREATE TABLE TEST(id int primary key,certification varchar2(20),empid varchar2(20),bcode varchar2(20))
partition by list(BCODE)(
partition P_0301 values('0301'),
partition P_0302 values('0302'),
partition P_0303 values('0303'),
partition P_0304 values('0304'),
partition P_0305 values('0305'),
partition P_0306 values('0306'),
partition P_0307 values('0307'),
partition P_0308 values('0308'),
partition P_0309 values('0309'),
partition P_DEFALUE values(default)
)
;
insert into test select level,'C'||to_char(round(dbms_random.value(1,5),0)),'A'||to_char(round(dbms_random.value(1,10000),0)),
'0'||to_char(round(dbms_random.value(301,301),0)) from dual connect by level<=100000;
commit;
insert into test select level+100000,'C'||to_char(round(dbms_random.value(6,30),0)),'A'||to_char(round(dbms_random.value(1,10000),0)),
'0'||to_char(round(dbms_random.value(301,309),0)) from dual connect by level<=400000;
commit;
drop table if exists test1;
create table test1(empid varchar2(20),ename varchar2(20),etime timestamp);
insert into test1 select 'A'||to_char(round(dbms_random.value(1,200000),0)),'ABC'||to_char(round(dbms_random.value(1,10000),0)),
SYSDATE-round(dbms_random.value(1,720),0) from dual connect by level<=200000;
commit;
dbms_stats.gather_table_stats(USER,'TEST',null,100);
dbms_stats.gather_table_stats(USER,'TEST1',null,100);
create or replace index IDX_DM_TEST_EMPID_BCODE on TEST(certification,empid)global;
TEST表记录员工获取证书的情况;TEST1表记录员工数据,现在需要筛选0301部门同时获取到C1、C2、C3、C4、C5这五个证书的员工。
一般做法:
with temp as (
select * from test
where bcode='0301'
)
select t1.* from test1 t1 where t1.empid in (select empid from temp where certification ='C1')
and t1.empid in (select empid from temp where certification ='C2')
and t1.empid in (select empid from temp where certification ='C3')
and t1.empid in (select empid from temp where certification ='C4')
and t1.empid in (select empid from temp where certification ='C5');
计划:
1 #NSET2: [130, 10000, 121]
2 #PRJT2: [130, 10000, 121]; exp_num(4), is_atom(FALSE)
3 #HASH LEFT SEMI JOIN2: [130, 10000, 121]; KEY_NUM(1); KEY(T1.EMPID=TEST.EMPID) KEY_NULL_EQU(0)
4 #HASH LEFT SEMI JOIN2: [111, 10000, 121]; KEY_NUM(1); KEY(T1.EMPID=TEST.EMPID) KEY_NULL_EQU(0)
5 #HASH LEFT SEMI JOIN2: [94, 10000, 121]; KEY_NUM(1); KEY(T1.EMPID=TEST.EMPID) KEY_NULL_EQU(0)
6 #HASH LEFT SEMI JOIN2: [77, 10000, 121]; KEY_NUM(1); KEY(T1.EMPID=TEST.EMPID) KEY_NULL_EQU(0)
7 #HASH RIGHT SEMI JOIN2: [59, 15853, 121]; n_keys(1) KEY(TEST.EMPID=T1.EMPID) KEY_NULL_EQU(0)
8 #PARALLEL: [14, 12431, 148]; scan_type(EQU), key_num(1, 0, 0), simple(1)
9 #SLCT2: [14, 12431, 148]; TEST.BCODE = '0301'
10 #BLKUP2: [14, 12431, 148]; IDX_DM_TEST_EMPID_BCODE(TEST)
11 #SSEK2: [14, 12431, 148]; scan_type(ASC), IDX_DM_TEST_EMPID_BCODE(TEST), scan_range[('C1',min),('C1',max)), is_global(1)
12 #CSCN2: [25, 200000, 121]; INDEX33555759(TEST1 as T1); btr_scan(1)
13 #PARALLEL: [4, 125041, 148]; scan_type(EQU), key_num(1, 0, 0), simple(1)
14 #SLCT2: [18, 125041, 148]; (TEST.BCODE = '0301' AND TEST.CERTIFICATION = 'C4')
15 #CSCN2: [18, 500000, 148]; INDEX33555761(TEST); btr_scan(1)
16 #PARALLEL: [4, 125041, 148]; scan_type(EQU), key_num(1, 0, 0), simple(1)
17 #SLCT2: [18, 125041, 148]; (TEST.BCODE = '0301' AND TEST.CERTIFICATION = 'C3')
18 #CSCN2: [18, 500000, 148]; INDEX33555761(TEST); btr_scan(1)
19 #PARALLEL: [4, 125041, 148]; scan_type(EQU), key_num(1, 0, 0), simple(1)
20 #SLCT2: [18, 125041, 148]; (TEST.BCODE = '0301' AND TEST.CERTIFICATION = 'C2')
21 #CSCN2: [18, 500000, 148]; INDEX33555761(TEST); btr_scan(1)
22 #PARALLEL: [14, 12479, 148]; scan_type(EQU), key_num(1, 0, 0), simple(1)
23 #SLCT2: [14, 12479, 148]; TEST.BCODE = '0301'
24 #BLKUP2: [14, 12479, 148]; IDX_DM_TEST_EMPID_BCODE(TEST)
25 #SSEK2: [14, 12479, 148]; scan_type(ASC), IDX_DM_TEST_EMPID_BCODE(TEST), scan_range[('C5',min),('C5',max)), is_global(1)
此时语句关联表的次数随着要找的证书记录增加而增加。
那么如何高效整理出这一份数据呢?
首先获取到0301部门且是C1、C2、C3、C4、C5的证书记录,并且按员工、证书分组记录,再按员工分组记录他获取证书情况,然后再求获取5个证书的员工,再和员工表进行关联。
改写如下:
select t1.* from test1 t1,(select * from (
select count(1) cnt,empid from (
select empid,certification from test where certification in ('C1','C2','C3','C4','C5') and bcode='0301'
group by empid,certification) tt group by empid) YY where cnt=5) t2 where t1.empid=t2.empid
;
计划:
1 #NSET2: [59, 275, 253]
2 #PRJT2: [59, 275, 253]; exp_num(3), is_atom(FALSE)
3 #HASH2 INNER JOIN: [59, 275, 253]; KEY_NUM(1); KEY(YY.EMPID=T1.EMPID) KEY_NULL_EQU(0)
4 #SLCT2: [17, 173, 144]; YY.CNT = var2
5 #PRJT2: [17, 6946, 144]; exp_num(2), is_atom(FALSE)
6 #HAGR2: [17, 6946, 144]; grp_num(1), sfun_num(1), distinct_flag[0]; slave_empty(0) keys(TT.EMPID)
7 #PRJT2: [16, 6946, 144]; exp_num(1), is_atom(FALSE)
8 #HAGR2: [16, 6946, 144]; grp_num(2), sfun_num(0); slave_empty(0) keys(TEST.EMPID, TEST.CERTIFICATION)
9 #HASH RIGHT SEMI JOIN2: [15, 6946, 144]; n_keys(1) KEY(DMTEMPVIEW_889198355.colname=TEST.CERTIFICATION) KEY_NULL_EQU(0)
10 #CONST VALUE LIST: [1, 5, 48]; row_num(5), col_num(1)
11 #PARALLEL: [1, 125041, 144]; scan_type(EQU), key_num(1, 0, 0), simple(1)
12 #SLCT2: [18, 500000, 144]; TEST.BCODE = '0301'
13 #CSCN2: [18, 500000, 144]; INDEX33555761(TEST); btr_scan(1)
14 #CSCN2: [25, 200000, 109]; INDEX33555759(TEST1 as T1); btr_scan(1)
这样的方式性能有几倍的提升。
优化思路:
1、 先过滤数据,获取满足0301部门和证书C1、C2、C3、C4、C5的集合
2、 再分组,先按员工和证书分组,再按员工分组;
3、 获取满足的数据集合,获取有5个证书的员工信息
4、 最后关联,关联员工表获取数据。
总体来讲,先过滤筛选数据,先分组后关联,当整理成一个满足需求再关联另一张表,这样的方式会更加高效。
文章
阅读量
获赞