Redis Sentinel集群双机房容灾实施步骤

栏目: 数据库 · 发布时间: 7年前

内容简介:概要目标防止双机房情况下任一个机房完全无法提供服务时如何让Redis继续提供服务。架构设计

概要目标

防止双机房情况下任一个机房完全无法提供服务时如何让 Redis 继续提供服务。

架构设计

A、B两机房,其中A机房有一Master一Slave和两个Sentinel,B机房只有2个Sentinel,如下图。

初始规划

A机房

192.168.71.213 S+哨兵

192.168.71.214 M+哨兵

B机房

192.168.70.214 S

192.168.70.215 S

目录创建

--redis软件目录

mkdir -p /home/redis

--pidfile文件存放目录

mkdir -p /home/redis/redisrun/

解压redis截止到 /home/redis

集群配置

【Master】

选择71.214作为Master

[root@node-71 redis]# vi /home/redis/redis.conf

#后台启动

daemonize yes

pidfile "/home/redis/redisrun/redis_6379.pid"

port 6379

timeout 0

tcp-keepalive 0

loglevel notice

logfile "/home/redis/redis.log"

databases 16

save 900 1

save 300 10

save 60 10000

stop-writes-on-bgsave-error yes

rdbcompression yes

rdbchecksum yes

dbfilename "dump.rdb"

dir "/home/redis/redisdb"

#如果做故障切换,不论主从节点都要填写密码且要保持一致

masterauth "123456"

slave-serve-stale-data yes

slave-read-only yes

repl-disable-tcp-nodelay no

slave-priority 98

#当前redis密码

requirepass "123456"

appendonly yes

# appendfsync always

appendfsync everysec

# appendfsync no

no-appendfsync-on-rewrite no

auto-aof-rewrite-percentage 100

auto-aof-rewrite-min-size 64mb

lua-time-limit 5000

slowlog-log-slower-than 10000

slowlog-max-len 128

notify-keyspace-events ""

hash-max-ziplist-entries 512

hash-max-ziplist-value 64

list-max-ziplist-entries 512

list-max-ziplist-value 64

set-max-intset-entries 512

zset-max-ziplist-entries 128

zset-max-ziplist-value 64

activerehashing yes

client-output-buffer-limit normal 0 0 0

client-output-buffer-limit slave 256mb 64mb 60

client-output-buffer-limit pubsub 32mb 8mb 60

hz 10

aof-rewrite-incremental-fsync yes

# Generated by CONFIG REWRITE

【Slave】

选择其余3个几点作为Slave

[root@node-71 redis]# vi /home/redis/redis.conf

daemonize yes

pidfile "/home/redis/redisrun/redis_6379.pid"

port 6379

timeout 0

tcp-keepalive 0

loglevel notice

logfile "/home/redis/redis.log"

databases 16

save 900 1

save 300 10

save 60 10000

stop-writes-on-bgsave-error yes

rdbcompression yes

rdbchecksum yes

dbfilename "dump.rdb"

dir "/home/redis/redisdb"

#主节点密码

masterauth "123456"

slave-serve-stale-data yes

slave-read-only yes

repl-disable-tcp-nodelay no

slave-priority 98

requirepass "123456"

appendonly yes

# appendfsync always

appendfsync everysec

# appendfsync no

no-appendfsync-on-rewrite no

auto-aof-rewrite-percentage 100

auto-aof-rewrite-min-size 64mb

lua-time-limit 5000

slowlog-log-slower-than 10000

slowlog-max-len 128

notify-keyspace-events ""

hash-max-ziplist-entries 512

hash-max-ziplist-value 64

list-max-ziplist-entries 512

list-max-ziplist-value 64

set-max-intset-entries 512

zset-max-ziplist-entries 128

zset-max-ziplist-value 64

activerehashing yes

client-output-buffer-limit normal 0 0 0

client-output-buffer-limit slave 256mb 64mb 60

client-output-buffer-limit pubsub 32mb 8mb 60

hz 10

aof-rewrite-incremental-fsync yes

# Generated by CONFIG REWRITE

#配置主节点信息

slaveof 192.168.71.214 6379

--检查修正

daemonize yes

pidfile "/home/redis/redisrun//redis_6379.pid"

logfile "/home/redis/redis.log"

【sentinel.conf】

选择A机房2节点作为sentinel

vi /home/redis/sentinel.conf

port 26379

#1表示在sentinel集群中只要有两个节点检测到redis主节点出故障就进行切换,单sentinel节点无效(自己测试发现的)

#如果3s内mymaster无响应,则认为mymaster宕机了

#如果10秒后,mysater仍没活过来,则启动failover

sentinel monitor mymaster 192.168.71.214 6379 1

sentinel down-after-milliseconds mymaster 3000

sentinel failover-timeout mymaster 10000

daemonize yes

#指定工作目录

dir "/home/redis/sentinel-work"

protected-mode no

logfile "/home/redis/sentinellog/sentinel.log"

#redis主节点密码

sentinel auth-pass mymaster 123456

# Generated by CONFIG REWRITE

--检查修正

sentinel monitor mymaster 192.168.71.214 6379 1

dir "/home/redis/sentinel-work"

logfile "/home/redis/sentinellog/sentinel.log"

启动检查

【启动集群与日志监控】

每个几点都执行

cd /home/redis/src/

./redis-server /home/redis/redis.conf

tail -f /home/redis/redis.log

只在sentinel节点执行

cd /home/redis/src/

./redis-sentinel /home/redis/sentinel.conf

tail -f /home/redis/sentinellog/sentinel.log

【Master检查】

cd /home/redis/src/

[root@localhost src]# ./redis-cli -h 192.168.70.214 -p 6379 -a 123456

192.168.70.214:6379> info Replication

# Replication

role:master

connected_slaves:3

slave0:ip=192.168.71.213,port=6379,state=online,offset=1107595,lag=1

slave1:ip=192.168.70.214,port=6379,state=online,offset=1107742,lag=0

slave2:ip=192.168.70.215,port=6379,state=online,offset=1107889,lag=0

master_repl_offset:1107889

repl_backlog_active:1

repl_backlog_size:1048576

repl_backlog_first_byte_offset:59314

repl_backlog_histlen:1048576

192.168.70.214:6379> set test zgy

OK

192.168.70.214:6379> get test

"zgy"

192.168.70.214:6379>

【Slave检查,只读】

192.168.71.214:6379> get test

"zgy"

192.168.71.214:6379> set test zgy2

(error) READONLY You can't write against a read only slave.

192.168.71.214:6379> info Replication

# Replication

role:slave

master_host:192.168.70.214

master_port:6379

master_link_status:up

master_last_io_seconds_ago:1

master_sync_in_progress:0

slave_repl_offset:42385

slave_priority:100

slave_read_only:1

connected_slaves:0

master_repl_offset:0

repl_backlog_active:0

repl_backlog_size:1048576

repl_backlog_first_byte_offset:0

repl_backlog_histlen:0

192.168.71.214:6379>

断网断电测试

断网

通过开启防火墙来模拟

service iptables status

--service iptables start

--70网段2节点的防火墙配置

[root@localhost redis]# cat /etc/sysconfig/iptables

# Firewall configuration written by system-config-firewall

# Manual customization of this file is not recommended.

*filter

:INPUT ACCEPT [0:0]

:FORWARD ACCEPT [0:0]

:OUTPUT ACCEPT [0:0]

#屏蔽A机房2个节点

-I INPUT -s 192.168.71.213 -j DROP

-I INPUT -s 192.168.71.214 -j DROP

-A INPUT -m state --state ESTABLISHED,RELATED -j ACCEPT

-A INPUT -p icmp -j ACCEPT

-A INPUT -i lo -j ACCEPT

-A INPUT -m state --state NEW -m tcp -p tcp --dport 22 -j ACCEPT

-A INPUT -j REJECT --reject-with icmp-host-prohibited

-A FORWARD -j REJECT --reject-with icmp-host-prohibited

COMMIT

断网

B机房断网前

--前

192.168.71.214:6379> info Replication

# Replication

role:master

connected_slaves:3

slave0:ip=192.168.71.213,port=6379,state=online,offset=12825868,lag=1

slave1:ip=192.168.70.214,port=6379,state=online,offset=12825868,lag=1

slave2:ip=192.168.70.215,port=6379,state=online,offset=12826015,lag=0

master_repl_offset:12826162

repl_backlog_active:1

repl_backlog_size:1048576

repl_backlog_first_byte_offset:11777587

repl_backlog_histlen:1048576

192.168.71.214:6379>

--后

--明显找不到70网段的那2个节点啦

192.168.71.214:6379> info Replication

# Replication

role:master

connected_slaves:1

slave0:ip=192.168.71.213,port=6379,state=online,offset=12909588,lag=1

master_repl_offset:12909588

repl_backlog_active:1

repl_backlog_size:1048576

repl_backlog_first_byte_offset:11861013

repl_backlog_histlen:1048576

192.168.71.214:6379>

而Master还能继续对外提供服务

A机房断网前、后

192.168.71.214:6379> info Replication

# Replication

role:master

connected_slaves:3

slave0:ip=192.168.71.213,port=6379,state=online,offset=12942691,lag=1

slave1:ip=192.168.70.214,port=6379,state=online,offset=12942691,lag=1

slave2:ip=192.168.70.215,port=6379,state=online,offset=12942838,lag=0

master_repl_offset:12942838

repl_backlog_active:1

repl_backlog_size:1048576

repl_backlog_first_byte_offset:11894263

repl_backlog_histlen:1048576

后,出现2个Master??

192.168.71.214:6379> info Replication

# Replication

role:master

connected_slaves:0

master_repl_offset:12957363

repl_backlog_active:1

repl_backlog_size:1048576

repl_backlog_first_byte_offset:11908788

repl_backlog_histlen:1048576

192.168.71.214:6379>

192.168.71.213:6379> info replication

# Replication

role:master

connected_slaves:0

master_repl_offset:12943881

repl_backlog_active:0

repl_backlog_size:1048576

repl_backlog_first_byte_offset:0

repl_backlog_histlen:0

192.168.71.213:6379>

断电

通过kill redis进程来模拟

ps -ef|grep redis

断电前

192.168.71.213:6379> info replication

# Replication

role:master

connected_slaves:3

slave0:ip=192.168.70.215,port=6379,state=online,offset=13091227,lag=0

slave1:ip=192.168.70.214,port=6379,state=online,offset=13091227,lag=0

slave2:ip=192.168.71.214,port=6379,state=online,offset=13091080,lag=1

master_repl_offset:13091227

repl_backlog_active:1

repl_backlog_size:1048576

repl_backlog_first_byte_offset:13087442

repl_backlog_histlen:3786

192.168.71.214:6379> info Replication

# Replication

role:master

connected_slaves:1

slave0:ip=192.168.71.213,port=6379,state=online,offset=13096642,lag=1

master_repl_offset:13096642

repl_backlog_active:1

repl_backlog_size:1048576

repl_backlog_first_byte_offset:13092272

repl_backlog_histlen:4371

192.168.71.214:6379>

断电后

192.168.70.214:6379> info Replication

# Replication

role:slave

master_host:192.168.71.214

master_port:6379

master_link_status:down

master_last_io_seconds_ago:-1

master_sync_in_progress:0

slave_repl_offset:13159324

master_link_down_since_seconds:18

slave_priority:100

slave_read_only:1

connected_slaves:0

master_repl_offset:0

repl_backlog_active:0

repl_backlog_size:1048576

repl_backlog_first_byte_offset:0

repl_backlog_histlen:0

92.168.70.215:6379> info Replication

# Replication

role:slave

master_host:192.168.71.214

master_port:6379

master_link_status:down

master_last_io_seconds_ago:-1

master_sync_in_progress:0

slave_repl_offset:13159324

master_link_down_since_seconds:28

slave_priority:100

slave_read_only:1

connected_slaves:0

master_repl_offset:0

repl_backlog_active:0

repl_backlog_size:1048576

repl_backlog_first_byte_offset:0

repl_backlog_histlen:0

70网段都变成Slave无法正常提供服务了。。。

此时,需要修改其中一个节点的配置来向外提供服务

先Kill掉redis进程,再修改某一节点的redis参数,指向其中一个节点,如70.215,并检查另外一台,删除这一项,最后重启2个节点,对外正常提供服务

vi /home/redis/redis.conf

slaveof 192.168.70.214 6379

[root@localhost src]# ./redis-cli -h 192.168.70.214 -p 6379 -a 123456

192.168.70.214:6379> info Replication

# Replication

role:master

connected_slaves:1

slave0:ip=192.168.70.215,port=6379,state=online,offset=15,lag=1

master_repl_offset:15

repl_backlog_active:1

repl_backlog_size:1048576

repl_backlog_first_byte_offset:2

repl_backlog_histlen:14

192.168.70.214:6379>

【还原初始】

修改71.214 之外的参数

vi /home/redis/redis.conf

slaveof 192.168.71.214 6379

vi /home/redis/sentinel.conf

sentinel monitor mymaster 192.168.71.214 6379 1

并删除最后几行

数据校验

Master执行更新数据会同步Slave

注意事项

见每步后面


以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持 码农网

查看所有标签

猜你喜欢:

本站部分资源来源于网络,本站转载出于传递更多信息之目的,版权归原作者或者来源机构所有,如转载稿涉及版权问题,请联系我们

We Are the Nerds

We Are the Nerds

Christine Lagorio-Chafkin / Hachette Books / 2018-10-2 / USD 18.30

Reddit hails itself as "the front page of the Internet." It's the third most-visited website in the United States--and yet, millions of Americans have no idea what it is. We Are the Nerds is an eng......一起来看看 《We Are the Nerds》 这本书的介绍吧!

图片转BASE64编码
图片转BASE64编码

在线图片转Base64编码工具

XML 在线格式化
XML 在线格式化

在线 XML 格式化压缩工具

正则表达式在线测试
正则表达式在线测试

正则表达式在线测试