3 条带处理我们依旧通过分析条带各轮次的处理来解析重构过程中代码执行流程及IO发生的情况 。
3.1 下发读请求函数调用关系:
handle_stripe() \_ analyse_stripe() \_ handle_stripe_fill()\_ fetch_block() \_ ops_run_io()
代码逻辑如下:
static void handle_stripe(struct stripe_head *sh){ /* 在sync_request中设置了该标记 */ if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {spin_lock(&sh->stripe_lock);/* 此时条带不是处理DISCARD请求 */if (!test_bit(STRIPE_DISCARD, &sh->state)/* 清掉STRIPE_SYNC_REQUESTED标记 */&& test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {/* 设置条带同步中标记 */set_bit(STRIPE_SYNCING, &sh->state);/* 清除条带一致状态的标记 */clear_bit(STRIPE_INSYNC, &sh->state);}spin_unlock(&sh->stripe_lock); } clear_bit(STRIPE_DELAYED, &sh->state); /* 解析条带状态 */ analyse_stripe(sh, &s); /* s.syncing为真且第一轮条带处理时s.uptodate + s.compute等于0条件满足进入handle_stripe_fill */ if (s.to_read || s.non_overwrite|| (conf->level == 6 && s.to_write && s.failed)|| (s.syncing && (s.uptodate + s.compute < disks))|| s.replacing|| s.expanding)handle_stripe_fill(sh, &s, disks); /* 此时 s.locked == 0 条件不成立不会进入该if分支 */ if ((s.syncing || s.replacing) && s.locked == 0&& test_bit(STRIPE_INSYNC, &sh->state)) {md_done_sync(conf->mddev, STRIPE_SECTORS, 1);clear_bit(STRIPE_SYNCING, &sh->state);if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags))wake_up(&conf->wait_for_overlap); } /* 下发读请求 */ ops_run_io(sh, &s);}static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s){ int do_recovery = 0; /* 遍历所有条带/设备 */ rcu_read_lock(); for (i=disks; i--; ) {/* 新加入的成员磁盘重构完成之前不处于同步状态,满足if条件 */if (!test_bit(R5_Insync, &dev->flags)) {/* 加上raid6在内最大支持坏2块磁盘 */if (s->failed < 2)s->failed_num[s->failed] = i;/* 自增failed */s->failed++;/* rdev指向新盘且新盘不是Faulty状态(旧盘是),满足if条件设置do_recovery */if (rdev && !test_bit(Faulty, &rdev->flags))do_recovery = 1;} } /* 在handle_stripe中设置了该标记 */ if (test_bit(STRIPE_SYNCING, &sh->state)) {/* do_recovery条件满足,设置 s->syncing = 1 表明条带在做重构 */if (do_recovery|| sh->sector >= conf->mddev->recovery_cp|| test_bit(MD_RECOVERY_REQUESTED, &(conf->mddev->recovery)))s->syncing = 1;elses->replacing = 1; } rcu_read_unlock();}static void handle_stripe_fill(struct stripe_head *sh,struct stripe_head_state *s,int disks){ int i; /* 未设置条带状态进入fetch_block */ if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state&& !sh->reconstruct_state)for (i = disks; i--; )if (fetch_block(sh, s, i, disks))break; set_bit(STRIPE_HANDLE, &sh->state);}static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s,int disk_idx, int disks){ struct r5dev *dev = &sh->dev[disk_idx]; struct r5dev *fdev[2] = { &sh->dev[s->failed_num[0]],&sh->dev[s->failed_num[1]] }; /* 此时所有条带/设备都未发起请求且未包含最新数据 */ /* 满足s->syncing条件进入第一层if */ if (!test_bit(R5_LOCKED, &dev->flags)&& !test_bit(R5_UPTODATE, &dev->flags)&& (dev->toread|| (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags))|| s->syncing || s->expanding|| (s->replacing && want_replace(sh, disk_idx))|| (s->failed >= 1 && fdev[0]->toread)|| (s->failed >= 2 && fdev[1]->toread)|| (sh->raid_conf->level <= 5 && s->failed && fdev[0]->towrite&& !test_bit(R5_OVERWRITE, &fdev[0]->flags))|| (sh->raid_conf->level == 6 && s->failed && s->to_write))) {/* we would like to get this block, possibly by computing it,* otherwise read it if the backing disk is insync*/BUG_ON(test_bit(R5_Wantcompute, &dev->flags));BUG_ON(test_bit(R5_Wantread, &dev->flags));/** 对所有正常可读的成员磁盘下发读请求* 需要注意的是,如果是raid5,因为只有一个冗余,因此重构是需要向所有其他磁盘下发读的* 但是如果是raid6,因为有两个冗余,在只有一个成员磁盘异常的情况下* 可以少读一块盘,但是实际没有这么做还是都读了,在后续处理中会用* 计算出来的值和读出来的值进行比较如果不相等则重新写一次进行修复*/if (test_bit(R5_Insync, &dev->flags)) {set_bit(R5_LOCKED, &dev->flags);set_bit(R5_Wantread, &dev->flags);/* 自增locked计数 */s->locked++;} } return 0;}static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s){ /* 遍历所有条带/设备 */ for (i = disks; i--; ) {/* 对设置了读标记的下发读请求 */if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags))rw = READ;/* 跳过其他不需要读的设备 */elsecontinue;if (rdev) {bio_reset(bi);bi->bi_bdev = rdev->bdev;bi->bi_rw = rw;bi->bi_end_io = raid5_end_read_request;bi->bi_private = sh;atomic_inc(&sh->count);if (use_new_offset(conf, sh))bi->bi_sector = (sh->sector + rdev->new_data_offset);elsebi->bi_sector = (sh->sector + rdev->data_offset);if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))bi->bi_rw |= REQ_FLUSH;bi->bi_vcnt = 1;bi->bi_io_vec[0].bv_len = STRIPE_SIZE;bi->bi_io_vec[0].bv_offset = 0;bi->bi_size = STRIPE_SIZE;/* 提交bio */generic_make_request(bi);} }}
经验总结扩展阅读
- RAID5 IO处理之写请求代码详解
- RAID5 IO处理之replace代码详解
- Linux Block模块之deadline调度算法代码解析
- Linux Block模块之IO合并代码解析
- 电视剧伪钞者之末路剧情介绍?
- 电视剧伪钞者之末路结局是什么?
- 【强烈推荐】用glob库的一行命令显著加速批量读取处理数据
- redis bitmap数据结构之java对等操作
- 一加9r参数_一加9r搭载什么处理器
- 哪些星座情侣配对最容易闹掰