Luminous版本CephFS的file location

栏目: 服务器 · 发布时间: 6年前

内容简介:在Ceph Luminous里部署了CephFS,想查看上面的一个file到Rados,再到OSDs的映射关系。之前在Ceph Jewel版本里,有个cephfs的工具,可以获取file的location信息,如下:但如上面的WARNING所述,这个命令被遗弃了,在Ceph Luminous里没找到相关的替换命令。。。

在Ceph Luminous里部署了CephFS,想查看上面的一个file到Rados,再到OSDs的映射关系。

之前在Ceph Jewel版本里,有个cephfs的工具,可以获取file的location信息,如下:

# cephfs /mnt/tstfs2/mike512K/tstfile show_location
WARNING: This tool is deprecated.  Use the layout.* xattrs to query and modify layouts.
location.file_offset:  0				// file的偏移
location.object_offset:0				// object的偏移
location.object_no:    0				// object的number
location.object_size:  4194304          // object size为4M
location.object_name:  10000002356.00000000 // object的name
location.block_offset: 0				// block的偏移
location.block_size:   524288	        // block size为512k
location.osd:          0				// 存储在osd 0 上

但如上面的WARNING所述,这个命令被遗弃了,在Ceph Luminous里没找到相关的替换命令。。。

在Ceph的官方文档里也没找到相关说法: http://docs.ceph.com/docs/master/cephfs/file-layouts/

那只能自己看代码分析了 ;(

代码分析

Jewel版本

在Jewel版本里是有 cephfs 这个 工具 的,那先看看它是如何工作的?

文件:src/cephfs.cc
int main (int argc, char **argv)
{
...
    if (CMD_SHOW_LAYOUT == cmd) {
    ...
    } else if (CMD_SHOW_LOC == cmd) {
        struct ceph_ioctl_dataloc location;
        location.file_offset = file_offset;
        err = ioctl(fd, CEPH_IOC_GET_DATALOC, (unsigned long)&location);
        if (err) {
            cerr << "Error getting location: " << cpp_strerror(err) << endl;
            return 1;
        }
        cout << "location.file_offset:  " << location.file_offset << endl;
        cout << "location.object_offset:" << location.object_offset << endl;
        cout << "location.object_no:    " << location.object_no << endl;
        cout << "location.object_size:  " << location.object_size << endl;
        cout << "location.object_name:  " << location.object_name << endl;
        cout << "location.block_offset: " << location.block_offset << endl;
        cout << "location.block_size:   " << location.block_size << endl;
        cout << "location.osd:          " << location.osd << endl;
//    cout << "osd address:           " << location.osd_addr << endl;
    }

上面代码段就是与命令 cephfs <file_path> show_location 相关的代码。

文件:src/client/ioctl.h
#define CEPH_IOC_GET_DATALOC _IOWR(CEPH_IOCTL_MAGIC, 3, \
                    struct ceph_ioctl_dataloc)

查看ceph代码里的ioctl,如下,这里只是 ceph-fuse 客户端实现:

文件:src/client/fuse_ll.cc
#ifdef FUSE_IOCTL_COMPAT
static void fuse_ll_ioctl(fuse_req_t req, fuse_ino_t ino, int cmd, void *arg, struct fuse_file_info *fi,
                          unsigned flags, const void *in_buf, size_t in_bufsz, size_t out_bufsz)
{
...
    switch(cmd) {
    case CEPH_IOC_GET_LAYOUT: {
        file_layout_t layout;
        struct ceph_ioctl_layout l;
        Fh *fh = (Fh*)fi->fh;
        cfuse->client->ll_file_layout(fh, &layout);
        l.stripe_unit = layout.stripe_unit;
        l.stripe_count = layout.stripe_count;
        l.object_size = layout.object_size;
        l.data_pool = layout.pool_id;
        fuse_reply_ioctl(req, 0, &l, sizeof(struct ceph_ioctl_layout));
    }
    break;
    default:
        fuse_reply_err(req, EINVAL);
    }
}

看到ceph-fuse仅仅支持 CEPH_IOC_GET_LAYOUT ioctl命令。

查看 Linux 的kernel代码,看相关cephfs的ioctl部分:

文件:fs/ceph/ioctl.c
/*
 * Return object name, size/offset information, and location (OSD
 * number, network address) for a given file offset.
 */
static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
{
    struct ceph_ioctl_dataloc dl;
    struct inode *inode = file_inode(file);
...
    dl.file_offset -= dl.object_offset;
    dl.object_size = ci->i_layout.object_size;
    dl.block_size = ci->i_layout.stripe_unit;

    /* block_offset = object_offset % block_size */
    tmp = dl.object_offset;
    dl.block_offset = do_div(tmp, dl.block_size);

    snprintf(dl.object_name, sizeof(dl.object_name), "%llx.%08llx",
         ceph_ino(inode), dl.object_no);
...
}

看出file的location信息是根据其 layout 信息和 inode 规则生成的。

Luminous版本

在Luminous版本里,没有找到 src/cephfs.cc 文件,那就查查其它相关代码。

想象一个file的什么过程会要求获取到Rados的映射信息?首先想到的就是read/write,那就看Ceph Luminous版本里的相关代码吧~

文件:src/client/Client.cc
int Client::_write(Fh *f, int64_t offset, uint64_t size, const char *buf,
                   const struct iovec *iov, int iovcnt)
{
...
        // async, caching, non-blocking.
        r = objectcacher->file_write(&in->oset, &in->layout,
                                     in->snaprealm->get_snap_context(),
                                     offset, size, bl, ceph::real_clock::now(),
                                     0);
...
}

文件:src/osdc/ObjectCacher.h
class ObjectCacher {
...
    int file_write(ObjectSet *oset, file_layout_t *layout,
                   const SnapContext& snapc, loff_t offset, uint64_t len,
                   bufferlist& bl, ceph::real_time mtime, int flags) {
        OSDWrite *wr = prepare_write(snapc, bl, mtime, flags, 0);
        Striper::file_to_extents(cct, oset->ino, layout, offset, len,
                                 oset->truncate_size, wr->extents);
        return writex(wr, oset, NULL);
    }
...
};

文件:osdc/Striper.h
class Striper {
...
    static void file_to_extents(CephContext *cct, inodeno_t ino,
                                const file_layout_t *layout,
                                uint64_t offset, uint64_t len,
                                uint64_t trunc_size,
                                vector<ObjectExtent>& extents) {
        // generate prefix/format
        char buf[32];
        snprintf(buf, sizeof(buf), "%llx.%%08llx", (long long unsigned)ino);

        file_to_extents(cct, buf, layout, offset, len, trunc_size, extents);
    }
...
};
可以看出file到extents的转换格式为:<ino.%%08llx>

也就是说在CephFS中file到Rados里object的映射关系如下。

object命名规则: <file inode number>.<slice number>

验证

root@ceph0:/mnt/cephfs# dd if=/dev/zero of=4Mfile bs=4M count=1
1+0 records in
1+0 records out
4194304 bytes (4.2 MB, 4.0 MiB) copied, 0.00866722 s, 484 MB/s
root@ceph0:/mnt/cephfs# ll -ih
total 4.1M
            1 drwxr-xr-x 1 root root  40G Jun  7 17:33 ./
     15466497 drwxr-xr-x 3 root root 4.0K Jun  4 15:19 ../
1099511628901 -rw-r--r-- 1 root root 4.0M Jun  7 17:33 4Mfile

root@ceph0:/mnt/cephfs# stat 4Mfile
  File: '4Mfile'
  Size: 4194304   	Blocks: 8192       IO Block: 4194304 regular file
Device: 10006bh/1048683d	Inode: 1099511628901  Links: 1
Access: (0644/-rw-r--r--)  Uid: (    0/    root)   Gid: (    0/    root)
Access: 2018-06-07 17:33:12.451473976 +0800
Modify: 2018-06-07 17:44:11.141674057 +0800
Change: 2018-06-07 17:44:11.141674057 +0800
 Birth: -

1099511628901 转换为16进制为: 0x10000000465

查看文件的layout信息:

root@ceph0:/mnt/cephfs# getfattr -n ceph.file.layout 4Mfile
# file: 4Mfile
ceph.file.layout="stripe_unit=4194304 stripe_count=1 object_size=4194304 pool=cephfs_data"

查看Rados里的object和其map信息:

root@ceph0:/mnt/cephfs# rados ls -p cephfs_data | grep -i 10000000465
10000000465.00000000
root@ceph0:/mnt/cephfs# rados -p cephfs_data stat 10000000465.00000000
cephfs_data/10000000465.00000000 mtime 2018-06-07 17:33:12.000000, size 4194304

root@ceph0:/mnt/cephfs# ceph osd map cephfs_data 10000000465.00000000
osdmap e5770 pool 'cephfs_data' (2) object '10000000465.00000000' -> pg 2.3137aa5e (2.5e) -> up ([2,6], p2) acting ([2,6], p2)

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持 码农网

查看所有标签

猜你喜欢:

本站部分资源来源于网络,本站转载出于传递更多信息之目的,版权归原作者或者来源机构所有,如转载稿涉及版权问题,请联系我们

Big Java Late Objects

Big Java Late Objects

Horstmann, Cay S. / 2012-2 / 896.00元

The introductory programming course is difficult. Many students fail to succeed or have trouble in the course because they don't understand the material and do not practice programming sufficiently. ......一起来看看 《Big Java Late Objects》 这本书的介绍吧!

SHA 加密
SHA 加密

SHA 加密工具

UNIX 时间戳转换
UNIX 时间戳转换

UNIX 时间戳转换

正则表达式在线测试
正则表达式在线测试

正则表达式在线测试