注册
开启页校验&镜像文件&镜像文件修复测试
培训园地/ 文章详情 /

开启页校验&镜像文件&镜像文件修复测试

YL 2023/09/18 1364 1 0

1 使用要求

1.1 初始化实例

./dminit PATH=/opt/dmdbms/data PAGE_SIZE==32 EXTENT_SIZE=32 LOG_SIZE=2048 CHARSET=0 CASE_SENSITIVE=1 PAGE_CHECK=1 SYSTEM_MIRROR_PATH=/opt/dmdbms/data/DBF_MIRROR/SYSTEM_MIRROR.DBF MAIN_MIRROR_PATH=/opt/dmdbms/data/DBF_MIRROR/MAIN_MIRROR.DBF ROLL_MIRROR_PATH=/opt/dmdbms/data/DBF_MIRROR/ROLL_MIRROR.DBF

1.2 创建表空间

create tablespace "TEST" datafile 'TEST.DBF' MIRROR '/opt/dmdbms/data/DBF_MIRROR/TEST_MIRROR.DBF' size 128 autoextend on next 2048;

2 镜像文件修复作用测试

2.1 测试结论

1、数据文件损坏,镜像文件未损坏,数据文件会被镜像文件修复。
2、数据文件未损坏,镜像文件损坏,数据文件不会有异常。
3、数据文件损坏,镜像文件损坏,数据库会故障core。
数据文件会被镜像文件修复的情况下,似乎只有修复记录,没看到损坏情况的说明。

2.2 构造数据

  • 创建实例
./dminit PATH=temp PAGE_SIZE=32 EXTENT_SIZE=32 CHARSET=1 PAGE_CHECK=1

需要开启page_check,否则无法配置镜像文件

  • 创建表空间
create tablespace "MAIL_F33" datafile 'MAIL.DBF' MIRROR '/home/db/dmdba/dmdbms/bin/temp/MIR4.dbf' size 256;
  • 创建用户
create user "MAIL_F33" identified by "123456789"
default tablespace "MAIL_F33"
default index tablespace "MAIL_F33";
grant "DBA","PUBLIC" to "MAIL_F33";
  • 构造数据
CREATE TABLE "MAIL_F33"."T_DATA_METRIC_202207"(
"C_INST_ID" INT NOT NULL,
"C_SUB_INST_ID" INT NOT NULL,
"C_TASK_TIME" INT NOT NULL,
"C_INSERT_TIME" TIMESTAMP(6) DEFAULT CURRENT_TIMESTAMP NOT NULL,
"C_METRIC_ID" VARCHAR(36) NOT NULL,
"C_DATA_VALUE" DECIMAL(20,2) DEFAULT NULL,
"C_METRIC_STATUS" VARCHAR(20) DEFAULT NULL,
"C_ERROR_MSG" VARCHAR(50) DEFAULT NULL,
"C_DATA_VALUE_ADJUST" DECIMAL(20,2) DEFAULT NULL,
"C_DATA_VALUE_EXPECT" DECIMAL(20,2) DEFAULT NULL);
declare
begin
  for i in 1..4000000 loop
     INSERT INTO MAIL_F33.t_data_metric_202207
       (C_INST_ID, C_SUB_INST_ID, C_TASK_TIME, C_INSERT_TIME, C_METRIC_ID, C_DATA_VALUE, C_METRIC_STATUS, C_ERROR_MSG, C_DATA_VALUE_ADJUST, C_DATA_VALUE_EXPECT)
        VALUES(1000+i, 1000+i,1584323799+i , CURRENT_TIMESTAMP, '测试001', 9999.99, 'GREEN', 'test_err', 0, 1);
     if mod(i,2000)=0 then
      commit;
     end if;
  end loop;
end;
  • 对比镜像文件和数据文件
    先select checkpoint(100);否则受purge进度影响,对比不准
[root@FT10497 DAMENG]# md5sum MAIL.DBF 
f14f5e13d534e51566ae331d9f0c272e  MAIL.DBF
[root@FT10497 temp]# md5sum MIR4.dbf 
f14f5e13d534e51566ae331d9f0c272e  MIR4.dbf

2.3 验证过程

2.3.1 构造数据文件损坏的场景

  • 随机向数据文件里写了几个0
dd if=/dev/zero of=MAIL.DBF bs=256 count=1 seek=1024 conv=notrunc
dd if=/dev/zero of=MAIL.DBF bs=256 count=1 seek=102400 conv=notrunc
dd if=/dev/zero of=MAIL.DBF bs=256 count=1 seek=1024000 conv=notrunc
  • 检查md5
[root@FT10497 DAMENG]# md5sum MAIL.DBF 
6692f5375f507b244a967407facbd14b  MAIL.DBF
  • 全表扫描一次
select ROWID,* from MAIL_F33.t_data_metric_202207 order by C_INSERT_TIME DESC
  • 日志里发生报错
2022-07-07 21:30:34 [ERROR] database P0000467096 main_thread  repair page(5, 0, 125) from mirror
2022-07-07 21:31:03 [INFO] database P0000467096 main_thread  ckpt info adjust: ckpt_lsn(4057326), ckpt_fil(0), ckpt_off(253860864), cur_lsn(4057326), next_seq(1544375), cur_free(253861376)
  • 检查md5
[root@FT10497 DAMENG]# md5sum MAIL.DBF 
8a99d433ed4a2f7d7f0eabcc5c78972e  MAIL.DBF

可以看出,镜像文件修复了一个坏页。不过两个数据文件并没有恢复到完全一样。

  • 再插入400W数据,使数据文件变满扩展
declare
begin
  for i in 1..4000000 loop
     INSERT INTO MAIL_F33.t_data_metric_202207
       (C_INST_ID, C_SUB_INST_ID, C_TASK_TIME, C_INSERT_TIME, C_METRIC_ID, C_DATA_VALUE, C_METRIC_STATUS, C_ERROR_MSG, C_DATA_VALUE_ADJUST, C_DATA_VALUE_EXPECT)
        VALUES(1000+i, 1000+i,1584323799+i , CURRENT_TIMESTAMP, '测试001', 9999.99, 'GREEN', 'test_err', 0, 1);
     if mod(i,2000)=0 then
      commit;
     end if;
  end loop;
end; 
select checkpoint(100);   --重要
  • 检查md5
[root@FT10497 DAMENG]# md5sum MAIL.DBF 
6efc3639b3030c9a5d1f0b98441cac6a  MAIL.DBF
[root@FT10497 DAMENG]# md5sum ../MIR4.dbf 
6efc3639b3030c9a5d1f0b98441cac6a  ../MIR4.dbf

可以看出,数据文件恢复了完全一致。目前不清楚是checkpoint(100);的效果,还是数据文件撑满扩展了的原因。

  • 再构造稍微大一点儿的损坏
dd if=/dev/zero of=MAIL.DBF bs=16 count=1 seek=1024 conv=notrunc
dd if=/dev/zero of=MAIL.DBF bs=16 count=1 seek=2048 conv=notrunc
dd if=/dev/zero of=MAIL.DBF bs=16 count=1 seek=4096 conv=notrunc
  • 检查md5
[root@FT10497 DAMENG]# md5sum ../MIR4.dbf 
6efc3639b3030c9a5d1f0b98441cac6a  ../MIR4.dbf
[root@FT10497 DAMENG]# md5sum MAIL.DBF 
537c5e84e07bfacd8c84bb9b63e807b9  MAIL.DBF
  • 全表扫描一次
select ROWID,* from MAIL_F33.t_data_metric_202207 order by C_INSERT_TIME DESC;
  • 这次日志里没有发生报错
2022-07-07 21:35:52 [INFO] database P0000467096 main_thread  ckpt info adjust: ckpt_lsn(8075268), ckpt_fil(1), ckpt_off(228595200), cur_lsn(8075722), next_seq(3068133), cur_free(228731392)
2022-07-07 21:37:04 [INFO] database P0000660900 main_thread  ckpt info adjust: ckpt_lsn(61647452), ckpt_fil(1), ckpt_off(22856192), cur_lsn(61647452), next_seq(39365633), cur_free(22856192)
2022-07-07 21:40:52 [INFO] database P0000467096 main_thread  ckpt info adjust: ckpt_lsn(8075722), ckpt_fil(1), ckpt_off(228731392), cur_lsn(8075722), next_seq(3068133), cur_free(228731392)
  • 再插入100W数据,使数据文件变满扩展
declare
begin
  for i in 1..1000000 loop
     INSERT INTO MAIL_F33.t_data_metric_202207
       (C_INST_ID, C_SUB_INST_ID, C_TASK_TIME, C_INSERT_TIME, C_METRIC_ID, C_DATA_VALUE, C_METRIC_STATUS, C_ERROR_MSG, C_DATA_VALUE_ADJUST, C_DATA_VALUE_EXPECT)
        VALUES(1000+i, 1000+i,1584323799+i , CURRENT_TIMESTAMP, '测试001', 9999.99, 'GREEN', 'test_err', 0, 1);
     if mod(i,2000)=0 then
      commit;
     end if;
  end loop;
end;
select checkpoint(100);   --重要
  • 日志里发生报错
2022-07-07 21:40:52 [INFO] database P0000467096 main_thread  ckpt info adjust: ckpt_lsn(8075722), ckpt_fil(1), ckpt_off(228731392), cur_lsn(8075722), next_seq(3068133), cur_free(228731392)
2022-07-07 21:42:04 [INFO] database P0000660900 main_thread  ckpt info adjust: ckpt_lsn(61647452), ckpt_fil(1), ckpt_off(22856192), cur_lsn(61647452), next_seq(39365633), cur_free(22856192)
2022-07-07 21:42:37 [INFO] database P0000467096 main_thread  ckpt info adjust: ckpt_lsn(8075722), ckpt_fil(1), ckpt_off(228731392), cur_lsn(8075722), next_seq(3068133), cur_free(228731392)
2022-07-07 21:45:41 [ERROR] database P0000467096 main_thread  repair page(5, 0, 0) from mirror
2022-07-07 21:45:53 [INFO] database P0000467096 main_thread  rfil_grp_file_switch while flush rlog_buf[8280001, 8280657]
2022-07-07 21:45:53 [INFO] database P0000467096 main_thread  rfil file switch from temp/DAMENG/DAMENG02.log to temp/DAMENG/DAMENG01.log, cur_lsn:[8280660], next_seq:[3145821]
  • 检查md5
[root@FT10497 DAMENG]# md5sum MAIL.DBF 
fbc18fabbf2400bc196bd03cf14e1116  MAIL.DBF
[root@FT10497 DAMENG]# md5sum ../MIR4.dbf 
ba94de0b23e932e87f2a93d50df333a0  ../MIR4.dbf

可以看出,镜像文件又修复了一个坏页。不过这次两个数据文件无法恢复到完全一样了。

  • 结论
    对于数据文件损坏的场景,镜像文件可以起到一定的修复作用。但至少在page_check=1的情况下,无法做到完全修复。而且似乎,不能确定修复前,数据损坏的具体情况,只能看到修复的结果。

2.3.2 构造镜像文件损坏在场景

  • 停库后,先用镜像文件修复数据
[root@FT10497 DAMENG]# cp ../MIR4.dbf MAIL.DBF 
cp:是否覆盖'MAIL.DBF'? y
  • 随机向数据文件里写了几个0
[root@FT10497 DAMENG]# dd if=/dev/zero of=../MIR4.dbf bs=4 count=1 seek=1024 conv=notrunc
记录了1+0 的读入
记录了1+0 的写出
4 bytes copied, 0.00016722 s, 23.9 kB/s
[root@FT10497 DAMENG]# dd if=/dev/zero of=../MIR4.dbf bs=4 count=1 seek=2048 conv=notrunc
记录了1+0 的读入
记录了1+0 的写出
4 bytes copied, 0.00016044 s, 24.9 kB/s
[root@FT10497 DAMENG]# dd if=/dev/zero of=../MIR4.dbf bs=4 count=1 seek=4096 conv=notrunc
记录了1+0 的读入
记录了1+0 的写出
4 bytes copied, 0.00016818 s, 23.8 kB/s
  • 检查md5
[root@FT10497 DAMENG]# md5sum MAIL.DBF 
7d9727d15e117db3a05a6a37326783b3  MAIL.DBF
[root@FT10497 DAMENG]# md5sum ../MIR4.dbf 
1535a71d52af3f74d830dfab1ce89642  ../MIR4.dbf
  • 全表扫描一次
select ROWID,* from MAIL_F33.t_data_metric_202207 order by C_INSERT_TIME DESC;
  • 再插入200W数据,使数据文件变满扩展
declare
begin
  for i in 1..2000000 loop
     INSERT INTO MAIL_F33.t_data_metric_202207
       (C_INST_ID, C_SUB_INST_ID, C_TASK_TIME, C_INSERT_TIME, C_METRIC_ID, C_DATA_VALUE, C_METRIC_STATUS, C_ERROR_MSG, C_DATA_VALUE_ADJUST, C_DATA_VALUE_EXPECT)
        VALUES(1000+i, 1000+i,1584323799+i , CURRENT_TIMESTAMP, '测试001', 9999.99, 'GREEN', 'test_err', 0, 1);
     if mod(i,2000)=0 then
      commit;
     end if;
  end loop;
end;   
select checkpoint(100);
  • 日志里没有动静
2022-07-07 22:25:14 [INFO] database P0000482380 main_thread  ckpt info adjust: ckpt_lsn(9098242), ckpt_fil(0), ckpt_off(156558848), cur_lsn(9098242), next_seq(3451451), cur_free(156558848)
2022-07-07 22:27:04 [INFO] database P0000660900 main_thread  ckpt info adjust: ckpt_lsn(61647452), ckpt_fil(1), ckpt_off(22856192), cur_lsn(61647452), next_seq(39365633), cur_free(22856192)
2022-07-07 22:30:14 [INFO] database P0000482380 main_thread  ckpt info adjust: ckpt_lsn(9098242), ckpt_fil(0), ckpt_off(156558848), cur_lsn(9098242), next_seq(3451451), cur_free(156558848)
  • 检查md5
[root@FT10497 DAMENG]# md5sum ../MIR4.dbf 
2b59c1ea80575cdb620aef8c7c9ef372  ../MIR4.dbf
[root@FT10497 DAMENG]# md5sum MAIL.DBF 
3c4406278930600fc02e654f5d1b3c62  MAIL.DBF
  • 结论
    镜像文件损坏似乎没有影响。

2.3.3 数据文件和镜像文件都损坏在场景

  • 向数据文件里相同位置写了几个0,为了情况足够明显,多写了一些
dd if=/dev/zero of=MAIL.DBF bs=256 count=1 seek=1024 conv=notrunc
dd if=/dev/zero of=MAIL.DBF bs=256 count=1 seek=102400 conv=notrunc
dd if=/dev/zero of=MAIL.DBF bs=256 count=1 seek=1024000 conv=notrunc
cp MAIL.DBF ../MIR4.dbf
  • 全表扫描一次
select ROWID,* from MAIL_F33.t_data_metric_202207 order by C_INSERT_TIME DESC;
  • 数据库日志报错并挂起
2022-07-07 23:56:41 [ERROR] database P0000504428 main_thread  xdec_move_from_nrec error. len:0, value:{0}
2022-07-07 23:56:41 [ERROR] database P0000504428 main_thread  xdec_move_from_nrec error. len:0, value:{0}
2022-07-07 23:56:41 [ERROR] database P0000504428 main_thread  xdec_move_from_nrec error. len:0, value:{0}
2022-07-07 23:56:41 [ERROR] database P0000504428 main_thread  xdec_move_from_nrec error. len:0, value:{0}
2022-07-07 23:56:41 [ERROR] database P0000504428 main_thread  xdec_move_from_nrec error. len:0, value:{0}
2022-07-07 23:56:41 [ERROR] database P0000504428 main_thread  xdec_move_from_nrec error. len:0, value:{0}
2022-07-07 23:56:41 [ERROR] database P0000504428 main_thread  xdec_move_from_nrec error. len:0, value:{0}
2022-07-07 23:56:41 [ERROR] database P0000504428 main_thread  xdec_move_from_nrec error. len:0, value:{0}
2022-07-07 23:56:41 [FATAL] database P0000504428 main_thread  fil_io try to R file(0, 0) beyond size. path:temp/DAMENG/SYSTEM.DBF size:4736 offset:20224
2022-07-07 23:56:41 [FATAL] database P0000504428 main_thread  dm_sys_halt now!!!
2022-07-07 23:56:41 [INFO] database P0000504428 main_thread  total 2 rfil opened!
  • 如果只写入数据文件,不写入镜像文件的话
dd if=/dev/zero of=MAIL.DBF bs=256 count=1 seek=1024 conv=notrunc
dd if=/dev/zero of=MAIL.DBF bs=256 count=1 seek=102400 conv=notrunc
dd if=/dev/zero of=MAIL.DBF bs=256 count=1 seek=1024000 conv=notrunc
  • 全表扫描一次
select ROWID,* from MAIL_F33.t_data_metric_202207 order by C_INSERT_TIME DESC;
  • 日志情况
2022-07-07 23:52:04 [INFO] database P0000660900 main_thread  ckpt info adjust: ckpt_lsn(61647452), ckpt_fil(1), ckpt_off(22856192), cur_lsn(61647452), next_seq(39365633), cur_free(22856192)
2022-07-07 23:54:53 [INFO] database P0000504428 main_thread  ckpt info adjust: ckpt_lsn(16148362), ckpt_fil(1), ckpt_off(180910592), cur_lsn(16148362), next_seq(6120413), cur_free(180910592)
2022-07-07 23:55:30 [ERROR] database P0000504428 main_thread  repair page(5, 0, 800) from mirror
2022-07-07 23:55:34 [ERROR] database P0000504428 main_thread  repair page(5, 0, 8000) from mirror
评论
后发表回复

作者

文章

阅读量

获赞

扫一扫
联系客服