RAID5 IO处理之重构代码详解( 四 )

3.3 下发写请求函数调用关系:
handle_stripe() \_ analyse_stripe() \_ handle_parity_checks5() \_ ops_run_io()代码逻辑如下:
static void handle_stripe(struct stripe_head *sh){ /* 解析条带状态 */ analyse_stripe(sh, &s); /* 此时uptodate等于disks不再进入if */ if (s.to_read || s.non_overwrite|| (conf->level == 6 && s.to_write && s.failed)|| (s.syncing && (s.uptodate + s.compute < disks))|| s.replacing|| s.expanding)handle_stripe_fill(sh, &s, disks); /** s.syncing为真,s.locked为0,STRIPE_COMPUTE_RUN被清除,* STRIPE_INSYNC尚未设置,进入handle_parity_checks5*/ if (sh->check_state ||(s.syncing && s.locked == 0 &&!test_bit(STRIPE_COMPUTE_RUN, &sh->state) &&!test_bit(STRIPE_INSYNC, &sh->state))) {if (conf->level == 6)handle_parity_checks6(conf, sh, &s, disks);elsehandle_parity_checks5(conf, sh, &s, disks); } /* 在handle_parity_checks5中自增了locked未进入if */ if ((s.syncing || s.replacing) && s.locked == 0 &&test_bit(STRIPE_INSYNC, &sh->state)) {md_done_sync(conf->mddev, STRIPE_SECTORS, 1);clear_bit(STRIPE_SYNCING, &sh->state);if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags))wake_up(&conf->wait_for_overlap); } /* 下发写请求 */ ops_run_io(sh, &s);}static void handle_parity_checks5(struct r5conf *conf, struct stripe_head *sh,struct stripe_head_state *s, int disks){ struct r5dev *dev = NULL; set_bit(STRIPE_HANDLE, &sh->state); switch (sh->check_state) { case check_state_idle:/* 在analyse_stripe中dev仍处于未同步状态所以failed不为0 */if (s->failed == 0) {BUG_ON(s->uptodate != disks);sh->check_state = check_state_run;set_bit(STRIPE_OP_CHECK, &s->ops_request);clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags);s->uptodate--;break;}/* 将异常磁盘赋值给dev */dev = &sh->dev[s->failed_num[0]];/* fall through */ case check_state_compute_result:sh->check_state = check_state_idle;/* 条带已设置同步状态标记则退出 */if (test_bit(STRIPE_INSYNC, &sh->state))break;/* 此时已经重构完,所以failed的dev必须包含最新数据且uptodate与磁盘数相等 */BUG_ON(!test_bit(R5_UPTODATE, &dev->flags));BUG_ON(s->uptodate != disks);/* 给需要下发写请求的dev上锁并设置需要写的标记 */set_bit(R5_LOCKED, &dev->flags);s->locked++;set_bit(R5_Wantwrite, &dev->flags);/* 清除条带降级标记 */clear_bit(STRIPE_DEGRADED, &sh->state);/* 设置条带处于同步状态标记 */set_bit(STRIPE_INSYNC, &sh->state);break; }}static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s){ /* 遍历条带/设备 */ for (i = disks; i--; ) {/* 设置IO标记为写 */if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) {if (test_and_clear_bit(R5_WantFUA, &sh->dev[i].flags))rw = WRITE_FUA;elserw = WRITE;if (test_bit(R5_Discard, &sh->dev[i].flags))rw |= REQ_DISCARD;}if (rdev) {bio_reset(bi);bi->bi_bdev = rdev->bdev;bi->bi_rw = rw;bi->bi_end_io = raid5_end_write_request;bi->bi_private = sh;atomic_inc(&sh->count);if (use_new_offset(conf, sh))bi->bi_sector = (sh->sector + rdev->new_data_offset);elsebi->bi_sector = (sh->sector + rdev->data_offset);if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))bi->bi_rw |= REQ_FLUSH;bi->bi_vcnt = 1;bi->bi_io_vec[0].bv_len = STRIPE_SIZE;bi->bi_io_vec[0].bv_offset = 0;bi->bi_size = STRIPE_SIZE;/* 提交bio */generic_make_request(bi);} }}3.4 同步结束函数调用关系:
handle_stripe() \_ analyse_stripe() \_ md_done_sync()代码逻辑如下:
static void handle_stripe(struct stripe_head *sh){ /* 解析条带 */ analyse_stripe(sh, &s); /** 在raid5_end_write_request中会清除R5_LOCKED标记,此时locked为0* 在上轮次处理中设置了STRIPE_INSYNC标记进入if*/ if ((s.syncing || s.replacing) && s.locked == 0 &&test_bit(STRIPE_INSYNC, &sh->state)) {/* 执行条带完成后的相关处理 */md_done_sync(conf->mddev, STRIPE_SECTORS, 1);/* 清除条带的同步中标记 */clear_bit(STRIPE_SYNCING, &sh->state); }}void md_done_sync(struct mddev *mddev, int blocks, int ok){ /** 重构流程中,提交的重构数量会累加到recovery_active中,* 这里每个条带完成后再减去相应的值*/ atomic_sub(blocks, &mddev->recovery_active); /** 重构时会控制重构速度,当提交的请求过多时重构线程会进入到recovery_wait等待* 这里唤醒在等待的重构线程*/ wake_up(&mddev->recovery_wait); /* ok等于1不进入if */ if (!ok) {/* 如果重构出现异常则打断重构 */set_bit(MD_RECOVERY_INTR, &mddev->recovery);set_bit(MD_RECOVERY_ERROR, &mddev->recovery);md_wakeup_thread(mddev->thread);// stop recovery, signal do_sync .... }}

经验总结扩展阅读