View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.chaos.actions;
20  
21  import java.util.HashSet;
22  import java.util.List;
23  import java.util.Set;
24  
25  import org.apache.hadoop.hbase.ServerName;
26  import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
27  
28  /**
29   * Restarts a ratio of the running regionservers at the same time
30   */
31  public class BatchRestartRsAction extends RestartActionBaseAction {
32    float ratio; //ratio of regionservers to restart
33  
34    public BatchRestartRsAction(long sleepTime, float ratio) {
35      super(sleepTime);
36      this.ratio = ratio;
37    }
38  
39    @Override
40    public void perform() throws Exception {
41      LOG.info(String.format("Performing action: Batch restarting %d%% of region servers",
42          (int)(ratio * 100)));
43      List<ServerName> selectedServers = PolicyBasedChaosMonkey.selectRandomItems(getCurrentServers(),
44          ratio);
45  
46      Set<ServerName> killedServers = new HashSet<ServerName>();
47  
48      for (ServerName server : selectedServers) {
49        // Don't keep killing servers if we're
50        // trying to stop the monkey.
51        if (context.isStopping()) {
52          break;
53        }
54        LOG.info("Killing region server:" + server);
55        cluster.killRegionServer(server);
56        killedServers.add(server);
57      }
58  
59      for (ServerName server : killedServers) {
60        cluster.waitForRegionServerToStop(server, PolicyBasedChaosMonkey.TIMEOUT);
61      }
62  
63      LOG.info("Killed " + killedServers.size() + " region servers. Reported num of rs:"
64          + cluster.getClusterStatus().getServersSize());
65  
66      sleep(sleepTime);
67  
68      for (ServerName server : killedServers) {
69        LOG.info("Starting region server:" + server.getHostname());
70        cluster.startRegionServer(server.getHostname(), server.getPort());
71  
72      }
73      for (ServerName server : killedServers) {
74        cluster.waitForRegionServerToStart(server.getHostname(),
75            server.getPort(),
76            PolicyBasedChaosMonkey.TIMEOUT);
77      }
78      LOG.info("Started " + killedServers.size() +" region servers. Reported num of rs:"
79          + cluster.getClusterStatus().getServersSize());
80    }
81  }