MongoDB中怎么切换副本集故障
MongoDB中怎么切换副本集故障,很多新手对此不是很清楚,为了帮助大家解决这个难题,下面小编将为大家详细讲解,有这方面需求的人可以来学习下,希望你能有所收获。
网站建设哪家好,找成都创新互联!专注于网页设计、网站建设、微信开发、小程序定制开发、集团企业网站建设等服务项目。为回馈新老客户创新互联还提供了平坝免费建站欢迎大家使用!
默认情况下主节点和从节点的优先级都为1,仲裁者为0,因为它不可参加选举。
查看集群配置
cjcmonset:PRIMARY> rs.conf()
{
"_id" : "cjcmonset",
"version" : 1,
"protocolVersion" : NumberLong(1),
"writeConcernMajorityJournalDefault" : true,
"members" : [
{
"_id" : 0,
"host" : "192.168.2.222:27017",
"arbiterOnly" : false,
"buildIndexes" : true,
"hidden" : false,
"priority" : 1,
"tags" : {
},
"slaveDelay" : NumberLong(0),
"votes" : 1
},
{
"_id" : 1,
"host" : "192.168.2.187:27017",
"arbiterOnly" : false,
"buildIndexes" : true,
"hidden" : false,
"priority" : 1,
"tags" : {
},
"slaveDelay" : NumberLong(0),
"votes" : 1
},
{
"_id" : 2,
"host" : "192.168.2.188:27017",
"arbiterOnly" : true,
"buildIndexes" : true,
"hidden" : false,
"priority" : 0,
"tags" : {
},
"slaveDelay" : NumberLong(0),
"votes" : 1
}
],
"settings" : {
"chainingAllowed" : true,
"heartbeatIntervalMillis" : 2000,
"heartbeatTimeoutSecs" : 10,
"electionTimeoutMillis" : 10000,
"catchUpTimeoutMillis" : -1,
"catchUpTakeoverDelayMillis" : 30000,
"getLastErrorModes" : {
},
"getLastErrorDefaults" : {
"w" : 1,
"wtimeout" : 0
},
"replicaSetId" : ObjectId("5e77148837ae69b4ab9b4870")
}
}
我将现有主节点2.222的优先级提高为5,目的是在主库故障恢复后可以自动将主库角色切换回来。
cjcmonset:PRIMARY> var rscfg=rs.conf()
cjcmonset:PRIMARY> rscfg.members[0].priority = 5
5
cjcmonset:PRIMARY> rs.reconfig(rscfg)
{
"ok" : 1,
"$clusterTime" : {
"clusterTime" : Timestamp(1584881617, 1),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
},
"operationTime" : Timestamp(1584881617, 1)
}
查看一下状态,主库优先级别已经调到5
cjcmonset:PRIMARY> rs.conf()
{
"_id" : "cjcmonset",
"version" : 2,
"protocolVersion" : NumberLong(1),
"writeConcernMajorityJournalDefault" : true,
"members" : [
{
"_id" : 0,
"host" : "192.168.2.222:27017",
"arbiterOnly" : false,
"buildIndexes" : true,
"hidden" : false,
"priority" : 5,
"tags" : {
},
"slaveDelay" : NumberLong(0),
"votes" : 1
},
{
"_id" : 1,
"host" : "192.168.2.187:27017",
"arbiterOnly" : false,
"buildIndexes" : true,
"hidden" : false,
"priority" : 1,
"tags" : {
},
"slaveDelay" : NumberLong(0),
"votes" : 1
},
{
"_id" : 2,
"host" : "192.168.2.188:27017",
"arbiterOnly" : true,
"buildIndexes" : true,
"hidden" : false,
"priority" : 0,
"tags" : {
},
"slaveDelay" : NumberLong(0),
"votes" : 1
}
],
"settings" : {
"chainingAllowed" : true,
"heartbeatIntervalMillis" : 2000,
"heartbeatTimeoutSecs" : 10,
"electionTimeoutMillis" : 10000,
"catchUpTimeoutMillis" : -1,
"catchUpTakeoverDelayMillis" : 30000,
"getLastErrorModes" : {
},
"getLastErrorDefaults" : {
"w" : 1,
"wtimeout" : 0
},
"replicaSetId" : ObjectId("5e77148837ae69b4ab9b4870")
}
}
手动将将主节点(2.222)mongodo停掉,测试故障转移功能
cjcmonset:PRIMARY> use admin
switched to db admin
cjcmonset:PRIMARY> db.shutdownServer()
2020-03-22T20:59:39.419+0800 I NETWORK [js] DBClientConnection failed to receive message from 127.0.0.1:27017 - HostUnreachable: Connection closed by peer
server should be down...
2020-03-22T20:59:39.422+0800 I NETWORK [js] trying reconnect to 127.0.0.1:27017 failed
2020-03-22T20:59:39.423+0800 I NETWORK [js] reconnect 127.0.0.1:27017 failed failed
在2.187节点查看集群状态,原主库187提示Connection refused,原从库2.187已经自动切换成主库。
查看集群状态
cjcmonset:PRIMARY> rs.status()
{
"set" : "cjcmonset",
"date" : ISODate("2020-03-22T13:00:33.838Z"),
"myState" : 1,
"term" : NumberLong(2),
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"heartbeatIntervalMillis" : NumberLong(2000),
"majorityVoteCount" : 2,
"writeMajorityCount" : 2,
"optimes" : {
"lastCommittedOpTime" : {
"ts" : Timestamp(1584881969, 1),
"t" : NumberLong(1)
},
"lastCommittedWallTime" : ISODate("2020-03-22T12:59:29.481Z"),
"readConcernMajorityOpTime" : {
"ts" : Timestamp(1584881969, 1),
"t" : NumberLong(1)
},
"readConcernMajorityWallTime" : ISODate("2020-03-22T12:59:29.481Z"),
"appliedOpTime" : {
"ts" : Timestamp(1584882028, 1),
"t" : NumberLong(2)
},
"durableOpTime" : {
"ts" : Timestamp(1584882028, 1),
"t" : NumberLong(2)
},
"lastAppliedWallTime" : ISODate("2020-03-22T13:00:28.344Z"),
"lastDurableWallTime" : ISODate("2020-03-22T13:00:28.344Z")
},
"lastStableRecoveryTimestamp" : Timestamp(1584881969, 1),
"lastStableCheckpointTimestamp" : Timestamp(1584881969, 1),
"electionCandidateMetrics" : {
"lastElectionReason" : "stepUpRequestSkipDryRun",
"lastElectionDate" : ISODate("2020-03-22T12:59:37.752Z"),
"electionTerm" : NumberLong(2),
"lastCommittedOpTimeAtElection" : {
"ts" : Timestamp(1584881969, 1),
"t" : NumberLong(1)
},
"lastSeenOpTimeAtElection" : {
"ts" : Timestamp(1584881969, 1),
"t" : NumberLong(1)
},
"numVotesNeeded" : 2,
"priorityAtElection" : 1,
"electionTimeoutMillis" : NumberLong(10000),
"priorPrimaryMemberId" : 0,
"numCatchUpOps" : NumberLong(0),
"newTermStartDate" : ISODate("2020-03-22T12:59:38.313Z")
},
"electionParticipantMetrics" : {
"votedForCandidate" : true,
"electionTerm" : NumberLong(1),
"lastVoteDate" : ISODate("2020-03-22T07:32:34.460Z"),
"electionCandidateMemberId" : 0,
"voteReason" : "",
"lastAppliedOpTimeAtElection" : {
"ts" : Timestamp(1584862345, 1),
"t" : NumberLong(-1)
},
"maxAppliedOpTimeInSet" : {
"ts" : Timestamp(1584862345, 1),
"t" : NumberLong(-1)
},
"priorityAtElection" : 1
},
"members" : [
{
"_id" : 0,
"name" : "192.168.2.222:27017",
"health" : 0,
"state" : 8,
"stateStr" : "(not reachable/healthy)",
"uptime" : 0,
"optime" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDurable" : {
"ts" : Timestamp(0, 0),
"t" : NumberLong(-1)
},
"optimeDate" : ISODate("1970-01-01T00:00:00Z"),
"optimeDurableDate" : ISODate("1970-01-01T00:00:00Z"),
"lastHeartbeat" : ISODate("2020-03-22T13:00:31.874Z"),
"lastHeartbeatRecv" : ISODate("2020-03-22T12:59:36.547Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "Error connecting to 192.168.2.222:27017 :: caused by :: Connection refused",
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : -1
},
{
"_id" : 1,
"name" : "192.168.2.187:27017",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
"uptime" : 19745,
"optime" : {
"ts" : Timestamp(1584882028, 1),
"t" : NumberLong(2)
},
"optimeDate" : ISODate("2020-03-22T13:00:28Z"),
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"electionTime" : Timestamp(1584881977, 1),
"electionDate" : ISODate("2020-03-22T12:59:37Z"),
"configVersion" : 2,
"self" : true,
"lastHeartbeatMessage" : ""
},
{
"_id" : 2,
"name" : "192.168.2.188:27017",
"health" : 1,
"state" : 7,
"stateStr" : "ARBITER",
"uptime" : 19689,
"lastHeartbeat" : ISODate("2020-03-22T13:00:31.872Z"),
"lastHeartbeatRecv" : ISODate("2020-03-22T13:00:32.657Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "",
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : 2
}
],
"ok" : 1,
"$clusterTime" : {
"clusterTime" : Timestamp(1584882028, 1),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
},
"operationTime" : Timestamp(1584882028, 1)
}
手动启动2.222节点MongoDB
[root@cjcos conf]# mongod --config /usr/local/mongodb/conf/mongodb.conf
cjcmonset:SECONDARY> rs.status()
{
"set" : "cjcmonset",
"date" : ISODate("2020-03-22T13:02:32.499Z"),
"myState" : 2,
"term" : NumberLong(2),
"syncingTo" : "192.168.2.187:27017",
"syncSourceHost" : "192.168.2.187:27017",
"syncSourceId" : 1,
"heartbeatIntervalMillis" : NumberLong(2000),
"majorityVoteCount" : 2,
"writeMajorityCount" : 2,
"optimes" : {
"lastCommittedOpTime" : {
"ts" : Timestamp(1584882148, 1),
"t" : NumberLong(2)
},
"lastCommittedWallTime" : ISODate("2020-03-22T13:02:28.367Z"),
"readConcernMajorityOpTime" : {
"ts" : Timestamp(1584882148, 1),
"t" : NumberLong(2)
},
"readConcernMajorityWallTime" : ISODate("2020-03-22T13:02:28.367Z"),
"appliedOpTime" : {
"ts" : Timestamp(1584882148, 1),
"t" : NumberLong(2)
},
"durableOpTime" : {
"ts" : Timestamp(1584882148, 1),
"t" : NumberLong(2)
},
"lastAppliedWallTime" : ISODate("2020-03-22T13:02:28.367Z"),
"lastDurableWallTime" : ISODate("2020-03-22T13:02:28.367Z")
},
"lastStableRecoveryTimestamp" : Timestamp(1584881969, 1),
"lastStableCheckpointTimestamp" : Timestamp(1584881969, 1),
"members" : [
{
"_id" : 0,
"name" : "192.168.2.222:27017",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 13,
"optime" : {
"ts" : Timestamp(1584882148, 1),
"t" : NumberLong(2)
},
"optimeDate" : ISODate("2020-03-22T13:02:28Z"),
"syncingTo" : "192.168.2.187:27017",
"syncSourceHost" : "192.168.2.187:27017",
"syncSourceId" : 1,
"infoMessage" : "",
"configVersion" : 2,
"self" : true,
"lastHeartbeatMessage" : ""
},
{
"_id" : 1,
"name" : "192.168.2.187:27017",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
"uptime" : 10,
"optime" : {
"ts" : Timestamp(1584882148, 1),
"t" : NumberLong(2)
},
"optimeDurable" : {
"ts" : Timestamp(1584882148, 1),
"t" : NumberLong(2)
},
"optimeDate" : ISODate("2020-03-22T13:02:28Z"),
"optimeDurableDate" : ISODate("2020-03-22T13:02:28Z"),
"lastHeartbeat" : ISODate("2020-03-22T13:02:31.498Z"),
"lastHeartbeatRecv" : ISODate("2020-03-22T13:02:31.261Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "",
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"electionTime" : Timestamp(1584881977, 1),
"electionDate" : ISODate("2020-03-22T12:59:37Z"),
"configVersion" : 2
},
{
"_id" : 2,
"name" : "192.168.2.188:27017",
"health" : 1,
"state" : 7,
"stateStr" : "ARBITER",
"uptime" : 10,
"lastHeartbeat" : ISODate("2020-03-22T13:02:31.496Z"),
"lastHeartbeatRecv" : ISODate("2020-03-22T13:02:32.014Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "",
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : 2
}
],
"ok" : 1,
"$clusterTime" : {
"clusterTime" : Timestamp(1584882148, 1),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
},
"operationTime" : Timestamp(1584882148, 1)
}
因为设置了优先级,所以在启动2.222节点mongo后,很快2.222节点又会被重新选举为主节点,而2.187节点又变成从节点。
cjcmonset:SECONDARY>
cjcmonset:PRIMARY>
cjcmonset:PRIMARY>
cjcmonset:PRIMARY>
---2.222日志如下:
2020-03-22T21:02:33.946+0800 I CONNPOOL [Replication] Connecting to 192.168.2.187:27017
2020-03-22T21:02:33.949+0800 I REPL [replexec-0] Member 192.168.2.187:27017 is now in state SECONDARY
2020-03-22T21:02:33.949+0800 I REPL [replexec-0] Caught up to the latest optime known via heartbeats after becoming primary. Target optime: { ts: Timestamp(1584882148, 1), t: 2 }. My Last Applied: { ts: Timestamp(1584882148, 1), t: 2 }
2020-03-22T21:02:33.949+0800 I REPL [replexec-0] Exited primary catch-up mode.
2020-03-22T21:02:33.949+0800 I REPL [replexec-0] Stopping replication producer
2020-03-22T21:02:33.949+0800 I REPL [rsBackgroundSync] Replication producer stopped after oplog fetcher finished returning a batch from our sync source. Abandoning this batch of oplog entries and re-evaluating our sync source.
2020-03-22T21:02:34.592+0800 I REPL [ReplBatcher] Oplog buffer has been drained in term 3
2020-03-22T21:02:34.592+0800 I REPL [RstlKillOpThread] Starting to kill user operations
2020-03-22T21:02:34.592+0800 I REPL [RstlKillOpThread] Stopped killing user operations
2020-03-22T21:02:34.592+0800 I REPL [RstlKillOpThread] State transition ops metrics: { lastStateTransition: "stepUp", userOpsKilled: 0, userOpsRunning: 0 }
2020-03-22T21:02:34.593+0800 I REPL [rsSync-0] transition to primary complete; database writes are now permitted
2020-03-22T21:02:34.712+0800 I REPL [SyncSourceFeedback] SyncSourceFeedback error sending update to 192.168.2.187:27017: InvalidSyncSource: Sync source was cleared. Was 192.168.2.187:27017
2020-03-22T21:02:35.459+0800 I NETWORK [listener] connection accepted from 192.168.2.187:41810 #13 (6 connections now open)
2020-03-22T21:02:35.460+0800 I NETWORK [conn13] received client metadata from 192.168.2.187:41810 conn13: { driver: { name: "NetworkInterfaceTL", version: "4.2.3" }, os: { type: "Linux", name: "CentOS Linux release 7.5.1804 (Core) ", architecture: "x86_64", version: "Kernel 3.10.0-862.el7.x86_64" } }
2020-03-22T21:02:39.711+0800 I CONNPOOL [RS] Ending connection to host 192.168.2.187:27017 due to bad connection status: CallbackCanceled: Callback was canceled; 1 connections to that host remain open
2020-03-22T21:02:43.944+0800 I CONNPOOL [Replication] Ending connection to host 192.168.2.187:27017 due to bad connection status: CallbackCanceled: Callback was canceled; 1 connections to that host remain open
查看集群状态,2.187又重新变成主节点
cjcmonset:PRIMARY> rs.status()
{
"set" : "cjcmonset",
"date" : ISODate("2020-03-22T13:04:24.678Z"),
"myState" : 1,
"term" : NumberLong(3),
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"heartbeatIntervalMillis" : NumberLong(2000),
"majorityVoteCount" : 2,
"writeMajorityCount" : 2,
"optimes" : {
"lastCommittedOpTime" : {
"ts" : Timestamp(1584882264, 1),
"t" : NumberLong(3)
},
"lastCommittedWallTime" : ISODate("2020-03-22T13:04:24.632Z"),
"readConcernMajorityOpTime" : {
"ts" : Timestamp(1584882264, 1),
"t" : NumberLong(3)
},
"readConcernMajorityWallTime" : ISODate("2020-03-22T13:04:24.632Z"),
"appliedOpTime" : {
"ts" : Timestamp(1584882264, 1),
"t" : NumberLong(3)
},
"durableOpTime" : {
"ts" : Timestamp(1584882264, 1),
"t" : NumberLong(3)
},
"lastAppliedWallTime" : ISODate("2020-03-22T13:04:24.632Z"),
"lastDurableWallTime" : ISODate("2020-03-22T13:04:24.632Z")
},
"lastStableRecoveryTimestamp" : Timestamp(1584882254, 1),
"lastStableCheckpointTimestamp" : Timestamp(1584882254, 1),
"electionCandidateMetrics" : {
"lastElectionReason" : "priorityTakeover",
"lastElectionDate" : ISODate("2020-03-22T13:02:33.880Z"),
"electionTerm" : NumberLong(3),
"lastCommittedOpTimeAtElection" : {
"ts" : Timestamp(1584882148, 1),
"t" : NumberLong(2)
},
"lastSeenOpTimeAtElection" : {
"ts" : Timestamp(1584882148, 1),
"t" : NumberLong(2)
},
"numVotesNeeded" : 2,
"priorityAtElection" : 5,
"electionTimeoutMillis" : NumberLong(10000),
"priorPrimaryMemberId" : 1,
"numCatchUpOps" : NumberLong(0),
"newTermStartDate" : ISODate("2020-03-22T13:02:34.593Z"),
"wMajorityWriteAvailabilityDate" : ISODate("2020-03-22T13:02:35.462Z")
},
"members" : [
{
"_id" : 0,
"name" : "192.168.2.222:27017",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY",
"uptime" : 125,
"optime" : {
"ts" : Timestamp(1584882264, 1),
"t" : NumberLong(3)
},
"optimeDate" : ISODate("2020-03-22T13:04:24Z"),
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"electionTime" : Timestamp(1584882153, 1),
"electionDate" : ISODate("2020-03-22T13:02:33Z"),
"configVersion" : 2,
"self" : true,
"lastHeartbeatMessage" : ""
},
{
"_id" : 1,
"name" : "192.168.2.187:27017",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 122,
"optime" : {
"ts" : Timestamp(1584882254, 1),
"t" : NumberLong(3)
},
"optimeDurable" : {
"ts" : Timestamp(1584882254, 1),
"t" : NumberLong(3)
},
"optimeDate" : ISODate("2020-03-22T13:04:14Z"),
"optimeDurableDate" : ISODate("2020-03-22T13:04:14Z"),
"lastHeartbeat" : ISODate("2020-03-22T13:04:24.023Z"),
"lastHeartbeatRecv" : ISODate("2020-03-22T13:04:23.967Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "",
"syncingTo" : "192.168.2.222:27017",
"syncSourceHost" : "192.168.2.222:27017",
"syncSourceId" : 0,
"infoMessage" : "",
"configVersion" : 2
},
{
"_id" : 2,
"name" : "192.168.2.188:27017",
"health" : 1,
"state" : 7,
"stateStr" : "ARBITER",
"uptime" : 122,
"lastHeartbeat" : ISODate("2020-03-22T13:04:24.019Z"),
"lastHeartbeatRecv" : ISODate("2020-03-22T13:04:24.112Z"),
"pingMs" : NumberLong(0),
"lastHeartbeatMessage" : "",
"syncingTo" : "",
"syncSourceHost" : "",
"syncSourceId" : -1,
"infoMessage" : "",
"configVersion" : 2
}
],
"ok" : 1,
"$clusterTime" : {
"clusterTime" : Timestamp(1584882264, 1),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
},
"operationTime" : Timestamp(1584882264, 1)
}
187节点自动变成从节点
cjcmonset:PRIMARY>
cjcmonset:PRIMARY>
cjcmonset:SECONDARY>
cjcmonset:SECONDARY>
看完上述内容是否对您有帮助呢?如果还想对相关知识有进一步的了解或阅读更多相关文章,请关注创新互联行业资讯频道,感谢您对创新互联的支持。
本文名称:MongoDB中怎么切换副本集故障
当前URL:http://scyanting.com/article/gsjpdi.html