2.2 IO所在磁盘异常这里指下发IO之前磁盘异常 。当IO所在磁盘异常时无法从该磁盘上直接读取数据,需要通过读取同条带的其他磁盘数据然后经过异或运算还原出当前要读的磁盘的数据 。该流程会经过以下四轮的条带处理,读取成功后将数据返回给调用者 。
2.2.1 下发读请求函数调用关系如下:
handle_stripe() \_ analyse_stripe() \_ handle_stripe_fill()\_ fetch_block() \_ ops_run_io()各函数执行的代码逻辑如下:
static void handle_stripe(struct stripe_head *sh){ /* 解析条带 */ analyse_stripe(sh, &s); /* 满足s.to_read条件进入handle_stripe_fill */ if (s.to_read || s.non_overwrite|| (conf->level == 6 && s.to_write && s.failed)|| (s.syncing && (s.uptodate + s.compute < disks))|| s.replacing|| s.expanding)handle_stripe_fill(sh, &s, disks); /* 调用ops_run_io检查是否有请求需要下发 */ ops_run_io(sh, &s);}static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s){ rcu_read_lock(); for (i = disks; i--; ) {dev = &sh->dev[i];/* 统计有读请求的dev */if (dev->toread)s->to_read++;/* 磁盘异常rdev设置为NULL */if (rdev && test_bit(Faulty, &rdev->flags))rdev = NULL;/* 清除条带/设备同步状态标记 */clear_bit(R5_Insync, &dev->flags);if (!test_bit(R5_Insync, &dev->flags)) {/* 记录异常磁盘索引 */if (s->failed < 2)s->failed_num[s->failed] = i;/* 统计异常dev */s->failed++;} } rcu_read_unlock();}static void handle_stripe_fill(struct stripe_head *sh,struct stripe_head_state *s,int disks){ int i; /* 当前条带状态没有设置标记,满足条件判断进入if */ if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state &&!sh->reconstruct_state)for (i = disks; i--; )if (fetch_block(sh, s, i, disks))break; set_bit(STRIPE_HANDLE, &sh->state);}static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s,int disk_idx, int disks){ /* 对于有读请求但磁盘异常的dev满足该条件进入到if */ if (!test_bit(R5_LOCKED, &dev->flags) &&!test_bit(R5_UPTODATE, &dev->flags) &&(dev->toread ||(s->failed >= 1 && fdev[0]->toread))) {/** 此时s.uptodate为0所以只能进入到最后的else if* 由于异常磁盘对应的dev无R5_Insync标记所以异常磁盘对应的dev什么都没做* 其他磁盘设置R5_LOCKED和R5_Wantread标记准备下发读请求*/if (test_bit(R5_Insync, &dev->flags)) {set_bit(R5_LOCKED, &dev->flags);set_bit(R5_Wantread, &dev->flags);s->locked++;pr_debug("Reading block %d (sync=%d)\n",disk_idx, s->syncing);} } return 0;}static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s){ struct r5conf *conf = sh->raid_conf; int i, disks = sh->disks; might_sleep(); for (i = disks; i--; ) {bi = &sh->dev[i].req;rbi = &sh->dev[i].rreq; /* For writing to replacement */rcu_read_lock();rrdev = rcu_dereference(conf->disks[i].replacement);smp_mb(); /* Ensure that if rrdev is NULL, rdev won't be */rdev = rcu_dereference(conf->disks[i].rdev);if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) {if (test_and_clear_bit(R5_WantFUA, &sh->dev[i].flags))rw = WRITE_FUA;elserw = WRITE;if (test_bit(R5_Discard, &sh->dev[i].flags))rw |= REQ_DISCARD;} else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags))/* 设置为请求类型为读 */rw = READ;else if (test_and_clear_bit(R5_WantReplace,&sh->dev[i].flags)) {rw = WRITE;replace_only = 1;} else/* 其余跳过 */continue;if (rdev) {set_bit(STRIPE_IO_STARTED, &sh->state);/** 设置bio参数* 包括重新设置bio指向的块设备,起始位置,IO完成回调函数*/bio_reset(bi);bi->bi_bdev = rdev->bdev;bi->bi_rw = rw;bi->bi_end_io = (rw & WRITE)? raid5_end_write_request: raid5_end_read_request;bi->bi_private = sh;atomic_inc(&sh->count);if (use_new_offset(conf, sh))bi->bi_sector = (sh->sector + rdev->new_data_offset);elsebi->bi_sector = (sh->sector + rdev->data_offset);if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))bi->bi_rw |= REQ_FLUSH;bi->bi_vcnt = 1;bi->bi_io_vec[0].bv_len = STRIPE_SIZE;bi->bi_io_vec[0].bv_offset = 0;bi->bi_size = STRIPE_SIZE;if (rrdev)set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags);/* 调用generic_make_request向底层块设备提交请求 */generic_make_request(bi);} }}
经验总结扩展阅读
- RAID5 IO处理之写请求代码详解
- RAID5 IO处理之重构代码详解
- RAID5 IO处理之replace代码详解
- Linux Block模块之deadline调度算法代码解析
- Linux Block模块之IO合并代码解析
- 电视剧伪钞者之末路剧情介绍?
- 电视剧伪钞者之末路结局是什么?
- 【强烈推荐】用glob库的一行命令显著加速批量读取处理数据
- redis bitmap数据结构之java对等操作
- 一加9r参数_一加9r搭载什么处理器
