Redis Sentinel集群双机房容灾实施步骤

栏目: 数据库 · 发布时间: 6年前

内容简介:概要目标防止双机房情况下任一个机房完全无法提供服务时如何让Redis继续提供服务。架构设计

概要目标

防止双机房情况下任一个机房完全无法提供服务时如何让 Redis 继续提供服务。

架构设计

A、B两机房,其中A机房有一Master一Slave和两个Sentinel,B机房只有2个Sentinel,如下图。

初始规划

A机房

192.168.71.213 S+哨兵

192.168.71.214 M+哨兵

B机房

192.168.70.214 S

192.168.70.215 S

目录创建

--redis软件目录

mkdir -p /home/redis

--pidfile文件存放目录

mkdir -p /home/redis/redisrun/

解压redis截止到 /home/redis

集群配置

【Master】

选择71.214作为Master

[root@node-71 redis]# vi /home/redis/redis.conf

#后台启动

daemonize yes

pidfile "/home/redis/redisrun/redis_6379.pid"

port 6379

timeout 0

tcp-keepalive 0

loglevel notice

logfile "/home/redis/redis.log"

databases 16

save 900 1

save 300 10

save 60 10000

stop-writes-on-bgsave-error yes

rdbcompression yes

rdbchecksum yes

dbfilename "dump.rdb"

dir "/home/redis/redisdb"

#如果做故障切换,不论主从节点都要填写密码且要保持一致

masterauth "123456"

slave-serve-stale-data yes

slave-read-only yes

repl-disable-tcp-nodelay no

slave-priority 98

#当前redis密码

requirepass "123456"

appendonly yes

# appendfsync always

appendfsync everysec

# appendfsync no

no-appendfsync-on-rewrite no

auto-aof-rewrite-percentage 100

auto-aof-rewrite-min-size 64mb

lua-time-limit 5000

slowlog-log-slower-than 10000

slowlog-max-len 128

notify-keyspace-events ""

hash-max-ziplist-entries 512

hash-max-ziplist-value 64

list-max-ziplist-entries 512

list-max-ziplist-value 64

set-max-intset-entries 512

zset-max-ziplist-entries 128

zset-max-ziplist-value 64

activerehashing yes

client-output-buffer-limit normal 0 0 0

client-output-buffer-limit slave 256mb 64mb 60

client-output-buffer-limit pubsub 32mb 8mb 60

hz 10

aof-rewrite-incremental-fsync yes

# Generated by CONFIG REWRITE

【Slave】

选择其余3个几点作为Slave

[root@node-71 redis]# vi /home/redis/redis.conf

daemonize yes

pidfile "/home/redis/redisrun/redis_6379.pid"

port 6379

timeout 0

tcp-keepalive 0

loglevel notice

logfile "/home/redis/redis.log"

databases 16

save 900 1

save 300 10

save 60 10000

stop-writes-on-bgsave-error yes

rdbcompression yes

rdbchecksum yes

dbfilename "dump.rdb"

dir "/home/redis/redisdb"

#主节点密码

masterauth "123456"

slave-serve-stale-data yes

slave-read-only yes

repl-disable-tcp-nodelay no

slave-priority 98

requirepass "123456"

appendonly yes

# appendfsync always

appendfsync everysec

# appendfsync no

no-appendfsync-on-rewrite no

auto-aof-rewrite-percentage 100

auto-aof-rewrite-min-size 64mb

lua-time-limit 5000

slowlog-log-slower-than 10000

slowlog-max-len 128

notify-keyspace-events ""

hash-max-ziplist-entries 512

hash-max-ziplist-value 64

list-max-ziplist-entries 512

list-max-ziplist-value 64

set-max-intset-entries 512

zset-max-ziplist-entries 128

zset-max-ziplist-value 64

activerehashing yes

client-output-buffer-limit normal 0 0 0

client-output-buffer-limit slave 256mb 64mb 60

client-output-buffer-limit pubsub 32mb 8mb 60

hz 10

aof-rewrite-incremental-fsync yes

# Generated by CONFIG REWRITE

#配置主节点信息

slaveof 192.168.71.214 6379

--检查修正

daemonize yes

pidfile "/home/redis/redisrun//redis_6379.pid"

logfile "/home/redis/redis.log"

【sentinel.conf】

选择A机房2节点作为sentinel

vi /home/redis/sentinel.conf

port 26379

#1表示在sentinel集群中只要有两个节点检测到redis主节点出故障就进行切换,单sentinel节点无效(自己测试发现的)

#如果3s内mymaster无响应,则认为mymaster宕机了

#如果10秒后,mysater仍没活过来,则启动failover

sentinel monitor mymaster 192.168.71.214 6379 1

sentinel down-after-milliseconds mymaster 3000

sentinel failover-timeout mymaster 10000

daemonize yes

#指定工作目录

dir "/home/redis/sentinel-work"

protected-mode no

logfile "/home/redis/sentinellog/sentinel.log"

#redis主节点密码

sentinel auth-pass mymaster 123456

# Generated by CONFIG REWRITE

--检查修正

sentinel monitor mymaster 192.168.71.214 6379 1

dir "/home/redis/sentinel-work"

logfile "/home/redis/sentinellog/sentinel.log"

启动检查

【启动集群与日志监控】

每个几点都执行

cd /home/redis/src/

./redis-server /home/redis/redis.conf

tail -f /home/redis/redis.log

只在sentinel节点执行

cd /home/redis/src/

./redis-sentinel /home/redis/sentinel.conf

tail -f /home/redis/sentinellog/sentinel.log

【Master检查】

cd /home/redis/src/

[root@localhost src]# ./redis-cli -h 192.168.70.214 -p 6379 -a 123456

192.168.70.214:6379> info Replication

# Replication

role:master

connected_slaves:3

slave0:ip=192.168.71.213,port=6379,state=online,offset=1107595,lag=1

slave1:ip=192.168.70.214,port=6379,state=online,offset=1107742,lag=0

slave2:ip=192.168.70.215,port=6379,state=online,offset=1107889,lag=0

master_repl_offset:1107889

repl_backlog_active:1

repl_backlog_size:1048576

repl_backlog_first_byte_offset:59314

repl_backlog_histlen:1048576

192.168.70.214:6379> set test zgy

OK

192.168.70.214:6379> get test

"zgy"

192.168.70.214:6379>

【Slave检查,只读】

192.168.71.214:6379> get test

"zgy"

192.168.71.214:6379> set test zgy2

(error) READONLY You can't write against a read only slave.

192.168.71.214:6379> info Replication

# Replication

role:slave

master_host:192.168.70.214

master_port:6379

master_link_status:up

master_last_io_seconds_ago:1

master_sync_in_progress:0

slave_repl_offset:42385

slave_priority:100

slave_read_only:1

connected_slaves:0

master_repl_offset:0

repl_backlog_active:0

repl_backlog_size:1048576

repl_backlog_first_byte_offset:0

repl_backlog_histlen:0

192.168.71.214:6379>

断网断电测试

断网

通过开启防火墙来模拟

service iptables status

--service iptables start

--70网段2节点的防火墙配置

[root@localhost redis]# cat /etc/sysconfig/iptables

# Firewall configuration written by system-config-firewall

# Manual customization of this file is not recommended.

*filter

:INPUT ACCEPT [0:0]

:FORWARD ACCEPT [0:0]

:OUTPUT ACCEPT [0:0]

#屏蔽A机房2个节点

-I INPUT -s 192.168.71.213 -j DROP

-I INPUT -s 192.168.71.214 -j DROP

-A INPUT -m state --state ESTABLISHED,RELATED -j ACCEPT

-A INPUT -p icmp -j ACCEPT

-A INPUT -i lo -j ACCEPT

-A INPUT -m state --state NEW -m tcp -p tcp --dport 22 -j ACCEPT

-A INPUT -j REJECT --reject-with icmp-host-prohibited

-A FORWARD -j REJECT --reject-with icmp-host-prohibited

COMMIT

断网

B机房断网前

--前

192.168.71.214:6379> info Replication

# Replication

role:master

connected_slaves:3

slave0:ip=192.168.71.213,port=6379,state=online,offset=12825868,lag=1

slave1:ip=192.168.70.214,port=6379,state=online,offset=12825868,lag=1

slave2:ip=192.168.70.215,port=6379,state=online,offset=12826015,lag=0

master_repl_offset:12826162

repl_backlog_active:1

repl_backlog_size:1048576

repl_backlog_first_byte_offset:11777587

repl_backlog_histlen:1048576

192.168.71.214:6379>

--后

--明显找不到70网段的那2个节点啦

192.168.71.214:6379> info Replication

# Replication

role:master

connected_slaves:1

slave0:ip=192.168.71.213,port=6379,state=online,offset=12909588,lag=1

master_repl_offset:12909588

repl_backlog_active:1

repl_backlog_size:1048576

repl_backlog_first_byte_offset:11861013

repl_backlog_histlen:1048576

192.168.71.214:6379>

而Master还能继续对外提供服务

A机房断网前、后

192.168.71.214:6379> info Replication

# Replication

role:master

connected_slaves:3

slave0:ip=192.168.71.213,port=6379,state=online,offset=12942691,lag=1

slave1:ip=192.168.70.214,port=6379,state=online,offset=12942691,lag=1

slave2:ip=192.168.70.215,port=6379,state=online,offset=12942838,lag=0

master_repl_offset:12942838

repl_backlog_active:1

repl_backlog_size:1048576

repl_backlog_first_byte_offset:11894263

repl_backlog_histlen:1048576

后,出现2个Master??

192.168.71.214:6379> info Replication

# Replication

role:master

connected_slaves:0

master_repl_offset:12957363

repl_backlog_active:1

repl_backlog_size:1048576

repl_backlog_first_byte_offset:11908788

repl_backlog_histlen:1048576

192.168.71.214:6379>

192.168.71.213:6379> info replication

# Replication

role:master

connected_slaves:0

master_repl_offset:12943881

repl_backlog_active:0

repl_backlog_size:1048576

repl_backlog_first_byte_offset:0

repl_backlog_histlen:0

192.168.71.213:6379>

断电

通过kill redis进程来模拟

ps -ef|grep redis

断电前

192.168.71.213:6379> info replication

# Replication

role:master

connected_slaves:3

slave0:ip=192.168.70.215,port=6379,state=online,offset=13091227,lag=0

slave1:ip=192.168.70.214,port=6379,state=online,offset=13091227,lag=0

slave2:ip=192.168.71.214,port=6379,state=online,offset=13091080,lag=1

master_repl_offset:13091227

repl_backlog_active:1

repl_backlog_size:1048576

repl_backlog_first_byte_offset:13087442

repl_backlog_histlen:3786

192.168.71.214:6379> info Replication

# Replication

role:master

connected_slaves:1

slave0:ip=192.168.71.213,port=6379,state=online,offset=13096642,lag=1

master_repl_offset:13096642

repl_backlog_active:1

repl_backlog_size:1048576

repl_backlog_first_byte_offset:13092272

repl_backlog_histlen:4371

192.168.71.214:6379>

断电后

192.168.70.214:6379> info Replication

# Replication

role:slave

master_host:192.168.71.214

master_port:6379

master_link_status:down

master_last_io_seconds_ago:-1

master_sync_in_progress:0

slave_repl_offset:13159324

master_link_down_since_seconds:18

slave_priority:100

slave_read_only:1

connected_slaves:0

master_repl_offset:0

repl_backlog_active:0

repl_backlog_size:1048576

repl_backlog_first_byte_offset:0

repl_backlog_histlen:0

92.168.70.215:6379> info Replication

# Replication

role:slave

master_host:192.168.71.214

master_port:6379

master_link_status:down

master_last_io_seconds_ago:-1

master_sync_in_progress:0

slave_repl_offset:13159324

master_link_down_since_seconds:28

slave_priority:100

slave_read_only:1

connected_slaves:0

master_repl_offset:0

repl_backlog_active:0

repl_backlog_size:1048576

repl_backlog_first_byte_offset:0

repl_backlog_histlen:0

70网段都变成Slave无法正常提供服务了。。。

此时,需要修改其中一个节点的配置来向外提供服务

先Kill掉redis进程,再修改某一节点的redis参数,指向其中一个节点,如70.215,并检查另外一台,删除这一项,最后重启2个节点,对外正常提供服务

vi /home/redis/redis.conf

slaveof 192.168.70.214 6379

[root@localhost src]# ./redis-cli -h 192.168.70.214 -p 6379 -a 123456

192.168.70.214:6379> info Replication

# Replication

role:master

connected_slaves:1

slave0:ip=192.168.70.215,port=6379,state=online,offset=15,lag=1

master_repl_offset:15

repl_backlog_active:1

repl_backlog_size:1048576

repl_backlog_first_byte_offset:2

repl_backlog_histlen:14

192.168.70.214:6379>

【还原初始】

修改71.214 之外的参数

vi /home/redis/redis.conf

slaveof 192.168.71.214 6379

vi /home/redis/sentinel.conf

sentinel monitor mymaster 192.168.71.214 6379 1

并删除最后几行

数据校验

Master执行更新数据会同步Slave

注意事项

见每步后面


以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持 码农网

查看所有标签

猜你喜欢:

本站部分资源来源于网络,本站转载出于传递更多信息之目的,版权归原作者或者来源机构所有,如转载稿涉及版权问题,请联系我们

产品经理手册(原书第4版)(白金版)

产品经理手册(原书第4版)(白金版)

[美] 琳达·哥乔斯(Linda Gorchels) / 祝亚雄、冯华丽、金骆彬 / 机械工业出版社 / 2017-8 / 65.00

产品经理的职责起点是新产品开发,贯穿产品生命周期的全过程。本书按上下游产品管理进行组织。 在上游的新产品开发流程中,作者阐述了如何从市场、产品、行业、公司的角度规划企划方案,并获得老板、销售部、运营部的资源支持,推进新产品的项目流程,实现所有目标,制定和实施新产品发布。 下游产品的管理核心在于生命周期的管理,营销更是生命周期管理的重中之重。产品经理如何让产品满足客户需求,让客户获得对产......一起来看看 《产品经理手册(原书第4版)(白金版)》 这本书的介绍吧!

JS 压缩/解压工具
JS 压缩/解压工具

在线压缩/解压 JS 代码

RGB转16进制工具
RGB转16进制工具

RGB HEX 互转工具

正则表达式在线测试
正则表达式在线测试

正则表达式在线测试