1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.util;
20
21 import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors;
22 import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertNoErrors;
23 import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck;
24 import static org.junit.Assert.assertEquals;
25 import static org.junit.Assert.assertFalse;
26 import static org.junit.Assert.assertNotEquals;
27 import static org.junit.Assert.assertNotNull;
28 import static org.junit.Assert.assertTrue;
29 import static org.junit.Assert.fail;
30
31 import java.io.IOException;
32 import java.util.ArrayList;
33 import java.util.Arrays;
34 import java.util.Collection;
35 import java.util.HashMap;
36 import java.util.HashSet;
37 import java.util.LinkedList;
38 import java.util.List;
39 import java.util.Map;
40 import java.util.Random;
41 import java.util.Map.Entry;
42 import java.util.NavigableMap;
43 import java.util.Set;
44 import java.util.concurrent.Callable;
45 import java.util.concurrent.CountDownLatch;
46 import java.util.concurrent.ExecutorService;
47 import java.util.concurrent.Executors;
48 import java.util.concurrent.Future;
49 import java.util.concurrent.ScheduledThreadPoolExecutor;
50 import java.util.concurrent.SynchronousQueue;
51 import java.util.concurrent.ThreadPoolExecutor;
52 import java.util.concurrent.TimeUnit;
53 import java.util.concurrent.atomic.AtomicBoolean;
54
55 import org.apache.commons.io.IOUtils;
56 import org.apache.commons.logging.Log;
57 import org.apache.commons.logging.LogFactory;
58 import org.apache.hadoop.conf.Configuration;
59 import org.apache.hadoop.fs.FileStatus;
60 import org.apache.hadoop.fs.FileSystem;
61 import org.apache.hadoop.fs.Path;
62 import org.apache.hadoop.hbase.ClusterStatus;
63 import org.apache.hadoop.hbase.HBaseTestingUtility;
64 import org.apache.hadoop.hbase.HColumnDescriptor;
65 import org.apache.hadoop.hbase.HConstants;
66 import org.apache.hadoop.hbase.HRegionInfo;
67 import org.apache.hadoop.hbase.HRegionLocation;
68 import org.apache.hadoop.hbase.HTableDescriptor;
69 import org.apache.hadoop.hbase.TableExistsException;
70 import org.apache.hadoop.hbase.testclassification.LargeTests;
71 import org.apache.hadoop.hbase.MiniHBaseCluster;
72 import org.apache.hadoop.hbase.RegionLocations;
73 import org.apache.hadoop.hbase.ServerName;
74 import org.apache.hadoop.hbase.TableName;
75 import org.apache.hadoop.hbase.MetaTableAccessor;
76 import org.apache.hadoop.hbase.client.Admin;
77 import org.apache.hadoop.hbase.client.ClusterConnection;
78 import org.apache.hadoop.hbase.client.Connection;
79 import org.apache.hadoop.hbase.client.ConnectionFactory;
80 import org.apache.hadoop.hbase.client.Delete;
81 import org.apache.hadoop.hbase.client.Durability;
82 import org.apache.hadoop.hbase.client.Get;
83 import org.apache.hadoop.hbase.client.HBaseAdmin;
84 import org.apache.hadoop.hbase.client.HConnection;
85 import org.apache.hadoop.hbase.client.HTable;
86 import org.apache.hadoop.hbase.client.MetaScanner;
87 import org.apache.hadoop.hbase.client.Mutation;
88 import org.apache.hadoop.hbase.client.Put;
89 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
90 import org.apache.hadoop.hbase.client.Result;
91 import org.apache.hadoop.hbase.client.ResultScanner;
92 import org.apache.hadoop.hbase.client.RowMutations;
93 import org.apache.hadoop.hbase.client.Scan;
94 import org.apache.hadoop.hbase.client.Table;
95 import org.apache.hadoop.hbase.coprocessor.BaseMasterObserver;
96 import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
97 import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment;
98 import org.apache.hadoop.hbase.coprocessor.ObserverContext;
99 import org.apache.hadoop.hbase.io.hfile.TestHFile;
100 import org.apache.hadoop.hbase.master.AssignmentManager;
101 import org.apache.hadoop.hbase.master.HMaster;
102 import org.apache.hadoop.hbase.master.RegionState;
103 import org.apache.hadoop.hbase.master.RegionStates;
104 import org.apache.hadoop.hbase.master.TableLockManager;
105 import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
106 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
107 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
108 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
109 import org.apache.hadoop.hbase.regionserver.HRegion;
110 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
111 import org.apache.hadoop.hbase.regionserver.HRegionServer;
112 import org.apache.hadoop.hbase.regionserver.SplitTransactionImpl;
113 import org.apache.hadoop.hbase.regionserver.TestEndToEndSplitTransaction;
114 import org.apache.hadoop.hbase.testclassification.LargeTests;
115 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter;
116 import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
117 import org.apache.hadoop.hbase.util.HBaseFsck.HbckInfo;
118 import org.apache.hadoop.hbase.util.HBaseFsck.PrintingErrorReporter;
119 import org.apache.hadoop.hbase.util.HBaseFsck.TableInfo;
120 import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
121 import org.apache.hadoop.hbase.util.hbck.HbckTestingUtil;
122 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
123 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
124 import org.apache.zookeeper.KeeperException;
125 import org.junit.AfterClass;
126 import org.junit.Assert;
127 import org.junit.Before;
128 import org.junit.BeforeClass;
129 import org.junit.Ignore;
130 import org.junit.Test;
131 import org.junit.experimental.categories.Category;
132 import org.junit.rules.TestName;
133
134 import com.google.common.collect.Multimap;
135
136
137
138
139 @Category(LargeTests.class)
140 public class TestHBaseFsck {
141 static final int POOL_SIZE = 7;
142 private static final Log LOG = LogFactory.getLog(TestHBaseFsck.class);
143 private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
144 private final static Configuration conf = TEST_UTIL.getConfiguration();
145 private final static String FAM_STR = "fam";
146 private final static byte[] FAM = Bytes.toBytes(FAM_STR);
147 private final static int REGION_ONLINE_TIMEOUT = 800;
148 private static RegionStates regionStates;
149 private static ExecutorService tableExecutorService;
150 private static ScheduledThreadPoolExecutor hbfsckExecutorService;
151 private static ClusterConnection connection;
152 private static Admin admin;
153
154
155 private HTable tbl;
156 private final static byte[][] SPLITS = new byte[][] { Bytes.toBytes("A"),
157 Bytes.toBytes("B"), Bytes.toBytes("C") };
158
159 private final static byte[][] ROWKEYS= new byte[][] {
160 Bytes.toBytes("00"), Bytes.toBytes("50"), Bytes.toBytes("A0"), Bytes.toBytes("A5"),
161 Bytes.toBytes("B0"), Bytes.toBytes("B5"), Bytes.toBytes("C0"), Bytes.toBytes("C5") };
162
163 @BeforeClass
164 public static void setUpBeforeClass() throws Exception {
165 TEST_UTIL.getConfiguration().set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY,
166 MasterSyncObserver.class.getName());
167
168 conf.setInt("hbase.regionserver.handler.count", 2);
169 conf.setInt("hbase.regionserver.metahandler.count", 30);
170
171 conf.setInt("hbase.htable.threads.max", POOL_SIZE);
172 conf.setInt("hbase.hconnection.threads.max", 2 * POOL_SIZE);
173 conf.setInt("hbase.hconnection.threads.core", POOL_SIZE);
174 conf.setInt("hbase.hbck.close.timeout", 2 * REGION_ONLINE_TIMEOUT);
175 conf.setInt(HConstants.HBASE_RPC_TIMEOUT_KEY, 8 * REGION_ONLINE_TIMEOUT);
176 TEST_UTIL.startMiniCluster(3);
177
178 tableExecutorService = new ThreadPoolExecutor(1, POOL_SIZE, 60, TimeUnit.SECONDS,
179 new SynchronousQueue<Runnable>(), Threads.newDaemonThreadFactory("testhbck"));
180
181 hbfsckExecutorService = new ScheduledThreadPoolExecutor(POOL_SIZE);
182
183 AssignmentManager assignmentManager =
184 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
185 regionStates = assignmentManager.getRegionStates();
186
187 connection = (ClusterConnection) TEST_UTIL.getConnection();
188
189 admin = connection.getAdmin();
190 admin.setBalancerRunning(false, true);
191
192 TEST_UTIL.waitUntilAllRegionsAssigned(TableName.META_TABLE_NAME);
193 TEST_UTIL.waitUntilAllRegionsAssigned(TableName.NAMESPACE_TABLE_NAME);
194 }
195
196 @AfterClass
197 public static void tearDownAfterClass() throws Exception {
198 tableExecutorService.shutdown();
199 hbfsckExecutorService.shutdown();
200 admin.close();
201 TEST_UTIL.shutdownMiniCluster();
202 }
203
204 @Before
205 public void setUp() {
206 EnvironmentEdgeManager.reset();
207 }
208
209 @Test (timeout=180000)
210 public void testHBaseFsck() throws Exception {
211 assertNoErrors(doFsck(conf, false));
212 TableName table = TableName.valueOf("tableBadMetaAssign");
213 HTableDescriptor desc = new HTableDescriptor(table);
214 HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
215 desc.addFamily(hcd);
216 createTable(TEST_UTIL, desc, null);
217
218
219 assertNoErrors(doFsck(conf, false));
220
221
222
223 Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
224 Scan scan = new Scan();
225 scan.setStartRow(Bytes.toBytes(table+",,"));
226 ResultScanner scanner = meta.getScanner(scan);
227 HRegionInfo hri = null;
228
229 Result res = scanner.next();
230 ServerName currServer =
231 ServerName.parseFrom(res.getValue(HConstants.CATALOG_FAMILY,
232 HConstants.SERVER_QUALIFIER));
233 long startCode = Bytes.toLong(res.getValue(HConstants.CATALOG_FAMILY,
234 HConstants.STARTCODE_QUALIFIER));
235
236 for (JVMClusterUtil.RegionServerThread rs :
237 TEST_UTIL.getHBaseCluster().getRegionServerThreads()) {
238
239 ServerName sn = rs.getRegionServer().getServerName();
240
241
242 if (!currServer.getHostAndPort().equals(sn.getHostAndPort()) ||
243 startCode != sn.getStartcode()) {
244 Put put = new Put(res.getRow());
245 put.setDurability(Durability.SKIP_WAL);
246 put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
247 Bytes.toBytes(sn.getHostAndPort()));
248 put.add(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
249 Bytes.toBytes(sn.getStartcode()));
250 meta.put(put);
251 hri = MetaTableAccessor.getHRegionInfo(res);
252 break;
253 }
254 }
255
256
257 assertErrors(doFsck(conf, true), new ERROR_CODE[]{
258 ERROR_CODE.SERVER_DOES_NOT_MATCH_META});
259
260 TEST_UTIL.getHBaseCluster().getMaster()
261 .getAssignmentManager().waitForAssignment(hri);
262
263
264 assertNoErrors(doFsck(conf, false));
265
266
267 Table t = connection.getTable(table, tableExecutorService);
268 ResultScanner s = t.getScanner(new Scan());
269 s.close();
270 t.close();
271
272 scanner.close();
273 meta.close();
274 }
275
276 @Test(timeout=180000)
277 public void testFixAssignmentsWhenMETAinTransition() throws Exception {
278 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
279 admin.closeRegion(cluster.getServerHoldingMeta(), HRegionInfo.FIRST_META_REGIONINFO);
280 regionStates.regionOffline(HRegionInfo.FIRST_META_REGIONINFO);
281 new MetaTableLocator().deleteMetaLocation(cluster.getMaster().getZooKeeper());
282 assertFalse(regionStates.isRegionOnline(HRegionInfo.FIRST_META_REGIONINFO));
283 HBaseFsck hbck = doFsck(conf, true);
284 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.UNKNOWN, ERROR_CODE.NO_META_REGION,
285 ERROR_CODE.NULL_META_REGION });
286 assertNoErrors(doFsck(conf, false));
287 }
288
289
290
291
292 private HRegionInfo createRegion(final HTableDescriptor
293 htd, byte[] startKey, byte[] endKey)
294 throws IOException {
295 Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
296 HRegionInfo hri = new HRegionInfo(htd.getTableName(), startKey, endKey);
297 MetaTableAccessor.addRegionToMeta(meta, hri);
298 meta.close();
299 return hri;
300 }
301
302
303
304
305 private void dumpMeta(TableName tableName) throws IOException {
306 List<byte[]> metaRows = TEST_UTIL.getMetaTableRows(tableName);
307 for (byte[] row : metaRows) {
308 LOG.info(Bytes.toString(row));
309 }
310 }
311
312
313
314
315
316 private void undeployRegion(Connection conn, ServerName sn,
317 HRegionInfo hri) throws IOException, InterruptedException {
318 try {
319 HBaseFsckRepair.closeRegionSilentlyAndWait((HConnection) conn, sn, hri);
320 if (!hri.isMetaTable()) {
321 admin.offline(hri.getRegionName());
322 }
323 } catch (IOException ioe) {
324 LOG.warn("Got exception when attempting to offline region "
325 + Bytes.toString(hri.getRegionName()), ioe);
326 }
327 }
328
329
330
331
332
333
334 private void deleteRegion(Configuration conf, final HTableDescriptor htd,
335 byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
336 boolean hdfs) throws IOException, InterruptedException {
337 deleteRegion(conf, htd, startKey, endKey, unassign, metaRow, hdfs, false, HRegionInfo.DEFAULT_REPLICA_ID);
338 }
339
340
341
342
343
344
345
346
347
348 private void deleteRegion(Configuration conf, final HTableDescriptor htd,
349 byte[] startKey, byte[] endKey, boolean unassign, boolean metaRow,
350 boolean hdfs, boolean regionInfoOnly, int replicaId)
351 throws IOException, InterruptedException {
352 LOG.info("** Before delete:");
353 dumpMeta(htd.getTableName());
354
355 List<HRegionLocation> locations = tbl.getAllRegionLocations();
356 for (HRegionLocation location : locations) {
357 HRegionInfo hri = location.getRegionInfo();
358 ServerName hsa = location.getServerName();
359 if (Bytes.compareTo(hri.getStartKey(), startKey) == 0
360 && Bytes.compareTo(hri.getEndKey(), endKey) == 0
361 && hri.getReplicaId() == replicaId) {
362
363 LOG.info("RegionName: " +hri.getRegionNameAsString());
364 byte[] deleteRow = hri.getRegionName();
365
366 if (unassign) {
367 LOG.info("Undeploying region " + hri + " from server " + hsa);
368 undeployRegion(connection, hsa, hri);
369 }
370
371 if (regionInfoOnly) {
372 LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
373 Path rootDir = FSUtils.getRootDir(conf);
374 FileSystem fs = rootDir.getFileSystem(conf);
375 Path p = new Path(FSUtils.getTableDir(rootDir, htd.getTableName()),
376 hri.getEncodedName());
377 Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
378 fs.delete(hriPath, true);
379 }
380
381 if (hdfs) {
382 LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
383 Path rootDir = FSUtils.getRootDir(conf);
384 FileSystem fs = rootDir.getFileSystem(conf);
385 Path p = new Path(FSUtils.getTableDir(rootDir, htd.getTableName()),
386 hri.getEncodedName());
387 HBaseFsck.debugLsr(conf, p);
388 boolean success = fs.delete(p, true);
389 LOG.info("Deleted " + p + " sucessfully? " + success);
390 HBaseFsck.debugLsr(conf, p);
391 }
392
393 if (metaRow) {
394 try (Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService)) {
395 Delete delete = new Delete(deleteRow);
396 meta.delete(delete);
397 }
398 }
399 }
400 LOG.info(hri.toString() + hsa.toString());
401 }
402
403 TEST_UTIL.getMetaTableRows(htd.getTableName());
404 LOG.info("*** After delete:");
405 dumpMeta(htd.getTableName());
406 }
407
408
409
410
411
412
413
414
415
416
417 void setupTable(TableName tablename) throws Exception {
418 setupTableWithRegionReplica(tablename, 1);
419 }
420
421
422
423
424
425
426
427
428
429
430 void setupTableWithRegionReplica(TableName tablename, int replicaCount) throws Exception {
431 HTableDescriptor desc = new HTableDescriptor(tablename);
432 desc.setRegionReplication(replicaCount);
433 HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
434 desc.addFamily(hcd);
435 createTable(TEST_UTIL, desc, SPLITS);
436
437 tbl = (HTable) connection.getTable(tablename, tableExecutorService);
438 List<Put> puts = new ArrayList<Put>();
439 for (byte[] row : ROWKEYS) {
440 Put p = new Put(row);
441 p.add(FAM, Bytes.toBytes("val"), row);
442 puts.add(p);
443 }
444 tbl.put(puts);
445 tbl.flushCommits();
446 }
447
448
449
450
451 int countRows() throws IOException {
452 Scan s = new Scan();
453 ResultScanner rs = tbl.getScanner(s);
454 int i = 0;
455 while(rs.next() !=null) {
456 i++;
457 }
458 return i;
459 }
460
461
462
463
464
465
466
467 void cleanupTable(TableName tablename) throws Exception {
468 if (tbl != null) {
469 tbl.close();
470 tbl = null;
471 }
472
473 ((ClusterConnection) connection).clearRegionCache();
474 deleteTable(TEST_UTIL, tablename);
475 }
476
477
478
479
480 @Test (timeout=180000)
481 public void testHBaseFsckClean() throws Exception {
482 assertNoErrors(doFsck(conf, false));
483 TableName table = TableName.valueOf("tableClean");
484 try {
485 HBaseFsck hbck = doFsck(conf, false);
486 assertNoErrors(hbck);
487
488 setupTable(table);
489 assertEquals(ROWKEYS.length, countRows());
490
491
492 hbck = doFsck(conf, false);
493 assertNoErrors(hbck);
494 assertEquals(0, hbck.getOverlapGroups(table).size());
495 assertEquals(ROWKEYS.length, countRows());
496 } finally {
497 cleanupTable(table);
498 }
499 }
500
501
502
503
504 @Test (timeout=180000)
505 public void testHbckThreadpooling() throws Exception {
506 TableName table =
507 TableName.valueOf("tableDupeStartKey");
508 try {
509
510 setupTable(table);
511
512
513 Configuration newconf = new Configuration(conf);
514 newconf.setInt("hbasefsck.numthreads", 1);
515 assertNoErrors(doFsck(newconf, false));
516
517
518 } finally {
519 cleanupTable(table);
520 }
521 }
522
523 @Test (timeout=180000)
524 public void testHbckFixOrphanTable() throws Exception {
525 TableName table = TableName.valueOf("tableInfo");
526 FileSystem fs = null;
527 Path tableinfo = null;
528 try {
529 setupTable(table);
530
531 Path hbaseTableDir = FSUtils.getTableDir(
532 FSUtils.getRootDir(conf), table);
533 fs = hbaseTableDir.getFileSystem(conf);
534 FileStatus status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
535 tableinfo = status.getPath();
536 fs.rename(tableinfo, new Path("/.tableinfo"));
537
538
539 HBaseFsck hbck = doFsck(conf, false);
540 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_TABLEINFO_FILE });
541
542
543 hbck = doFsck(conf, true);
544 assertNoErrors(hbck);
545 status = null;
546 status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
547 assertNotNull(status);
548
549 HTableDescriptor htd = admin.getTableDescriptor(table);
550 htd.setValue("NOT_DEFAULT", "true");
551 admin.disableTable(table);
552 admin.modifyTable(table, htd);
553 admin.enableTable(table);
554 fs.delete(status.getPath(), true);
555
556
557 htd = admin.getTableDescriptor(table);
558 hbck = doFsck(conf, true);
559 assertNoErrors(hbck);
560 status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
561 assertNotNull(status);
562 htd = admin.getTableDescriptor(table);
563 assertEquals(htd.getValue("NOT_DEFAULT"), "true");
564 } finally {
565 fs.rename(new Path("/.tableinfo"), tableinfo);
566 cleanupTable(table);
567 }
568 }
569
570
571
572
573
574
575 @Test (timeout=180000)
576 public void testParallelHbck() throws Exception {
577 final ExecutorService service;
578 final Future<HBaseFsck> hbck1,hbck2;
579
580 class RunHbck implements Callable<HBaseFsck>{
581 boolean fail = true;
582 @Override
583 public HBaseFsck call(){
584 Configuration c = new Configuration(conf);
585 c.setInt("hbase.hbck.lockfile.attempts", 1);
586
587
588 c.setInt("hbase.hbck.lockfile.maxwaittime", 3);
589 try{
590 return doFsck(c, true);
591 } catch(Exception e){
592 if (e.getMessage().contains("Duplicate hbck")) {
593 fail = false;
594 }
595 }
596
597 if (fail) fail();
598 return null;
599 }
600 }
601 service = Executors.newFixedThreadPool(2);
602 hbck1 = service.submit(new RunHbck());
603 hbck2 = service.submit(new RunHbck());
604 service.shutdown();
605
606 service.awaitTermination(15, TimeUnit.SECONDS);
607 HBaseFsck h1 = hbck1.get();
608 HBaseFsck h2 = hbck2.get();
609
610 assert(h1 == null || h2 == null);
611 if (h1 != null) {
612 assert(h1.getRetCode() >= 0);
613 }
614 if (h2 != null) {
615 assert(h2.getRetCode() >= 0);
616 }
617 }
618
619
620
621
622
623
624
625 @Test (timeout=180000)
626 public void testParallelWithRetriesHbck() throws Exception {
627 final ExecutorService service;
628 final Future<HBaseFsck> hbck1,hbck2;
629
630
631
632
633
634
635
636 final int timeoutInSeconds = 80;
637 final int sleepIntervalInMilliseconds = 200;
638 final int maxSleepTimeInMilliseconds = 6000;
639 final int maxRetryAttempts = 15;
640
641 class RunHbck implements Callable<HBaseFsck>{
642
643 @Override
644 public HBaseFsck call() throws Exception {
645
646 Configuration c = new Configuration(conf);
647 c.setInt("hbase.hbck.lockfile.maxwaittime", timeoutInSeconds);
648 c.setInt("hbase.hbck.lockfile.attempt.sleep.interval", sleepIntervalInMilliseconds);
649 c.setInt("hbase.hbck.lockfile.attempt.maxsleeptime", maxSleepTimeInMilliseconds);
650 c.setInt("hbase.hbck.lockfile.attempts", maxRetryAttempts);
651 return doFsck(c, false);
652 }
653 }
654
655 service = Executors.newFixedThreadPool(2);
656 hbck1 = service.submit(new RunHbck());
657 hbck2 = service.submit(new RunHbck());
658 service.shutdown();
659
660 service.awaitTermination(timeoutInSeconds * 2, TimeUnit.SECONDS);
661 HBaseFsck h1 = hbck1.get();
662 HBaseFsck h2 = hbck2.get();
663
664 assertNotNull(h1);
665 assertNotNull(h2);
666 assert(h1.getRetCode() >= 0);
667 assert(h2.getRetCode() >= 0);
668
669 }
670
671
672
673
674
675 @Test (timeout=180000)
676 public void testDupeStartKey() throws Exception {
677 TableName table =
678 TableName.valueOf("tableDupeStartKey");
679 try {
680 setupTable(table);
681 assertNoErrors(doFsck(conf, false));
682 assertEquals(ROWKEYS.length, countRows());
683
684
685 HRegionInfo hriDupe =
686 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("A2"));
687 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
688 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
689 .waitForAssignment(hriDupe);
690 ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
691 TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
692
693 HBaseFsck hbck = doFsck(conf, false);
694 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS,
695 ERROR_CODE.DUPE_STARTKEYS});
696 assertEquals(2, hbck.getOverlapGroups(table).size());
697 assertEquals(ROWKEYS.length, countRows());
698
699
700 doFsck(conf,true);
701
702
703 HBaseFsck hbck2 = doFsck(conf,false);
704 assertNoErrors(hbck2);
705 assertEquals(0, hbck2.getOverlapGroups(table).size());
706 assertEquals(ROWKEYS.length, countRows());
707 } finally {
708 cleanupTable(table);
709 }
710 }
711
712
713
714
715
716 @Test (timeout=180000)
717 public void testHbckWithRegionReplica() throws Exception {
718 TableName table =
719 TableName.valueOf("testHbckWithRegionReplica");
720 try {
721 setupTableWithRegionReplica(table, 2);
722 TEST_UTIL.getHBaseAdmin().flush(table.getName());
723 assertNoErrors(doFsck(conf, false));
724 } finally {
725 cleanupTable(table);
726 }
727 }
728
729 @Test
730 public void testHbckWithFewerReplica() throws Exception {
731 TableName table =
732 TableName.valueOf("testHbckWithFewerReplica");
733 try {
734 setupTableWithRegionReplica(table, 2);
735 TEST_UTIL.getHBaseAdmin().flush(table.getName());
736 assertNoErrors(doFsck(conf, false));
737 assertEquals(ROWKEYS.length, countRows());
738 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
739 Bytes.toBytes("C"), true, false, false, false, 1);
740
741 HBaseFsck hbck = doFsck(conf, false);
742 assertErrors(hbck, new ERROR_CODE[]{ERROR_CODE.NOT_DEPLOYED});
743
744 hbck = doFsck(conf, true);
745
746 hbck = doFsck(conf, false);
747 assertErrors(hbck, new ERROR_CODE[]{});
748 } finally {
749 cleanupTable(table);
750 }
751 }
752
753 @Test
754 public void testHbckWithExcessReplica() throws Exception {
755 TableName table =
756 TableName.valueOf("testHbckWithExcessReplica");
757 try {
758 setupTableWithRegionReplica(table, 2);
759 TEST_UTIL.getHBaseAdmin().flush(table.getName());
760 assertNoErrors(doFsck(conf, false));
761 assertEquals(ROWKEYS.length, countRows());
762
763
764
765
766 HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
767 List<HRegionInfo> regions = TEST_UTIL.getHBaseAdmin().getTableRegions(table);
768 byte[] startKey = Bytes.toBytes("B");
769 byte[] endKey = Bytes.toBytes("C");
770 byte[] metaKey = null;
771 HRegionInfo newHri = null;
772 for (HRegionInfo h : regions) {
773 if (Bytes.compareTo(h.getStartKey(), startKey) == 0 &&
774 Bytes.compareTo(h.getEndKey(), endKey) == 0 &&
775 h.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
776 metaKey = h.getRegionName();
777
778 newHri = RegionReplicaUtil.getRegionInfoForReplica(h, 2);
779 break;
780 }
781 }
782 Put put = new Put(metaKey);
783 ServerName sn = TEST_UTIL.getHBaseAdmin().getClusterStatus().getServers()
784 .toArray(new ServerName[0])[0];
785
786 MetaTableAccessor.addLocation(put, sn, sn.getStartcode(), -1, 2);
787 meta.put(put);
788 meta.flushCommits();
789
790 HBaseFsckRepair.fixUnassigned((HBaseAdmin)TEST_UTIL.getHBaseAdmin(), newHri);
791 HBaseFsckRepair.waitUntilAssigned((HBaseAdmin)TEST_UTIL.getHBaseAdmin(), newHri);
792
793 Delete delete = new Delete(metaKey);
794 delete.deleteColumns(HConstants.CATALOG_FAMILY, MetaTableAccessor.getServerColumn(2));
795 delete.deleteColumns(HConstants.CATALOG_FAMILY, MetaTableAccessor.getStartCodeColumn(2));
796 delete.deleteColumns(HConstants.CATALOG_FAMILY, MetaTableAccessor.getSeqNumColumn(2));
797 meta.delete(delete);
798 meta.flushCommits();
799 meta.close();
800
801 HBaseFsck hbck = doFsck(conf, false);
802 assertErrors(hbck, new ERROR_CODE[]{ERROR_CODE.NOT_IN_META});
803
804 hbck = doFsck(conf, true);
805
806 hbck = doFsck(conf, false);
807 assertErrors(hbck, new ERROR_CODE[]{});
808 } finally {
809 cleanupTable(table);
810 }
811 }
812
813
814
815 Map<ServerName, List<String>> getDeployedHRIs(final HBaseAdmin admin) throws IOException {
816 ClusterStatus status = admin.getClusterStatus();
817 Collection<ServerName> regionServers = status.getServers();
818 Map<ServerName, List<String>> mm =
819 new HashMap<ServerName, List<String>>();
820 for (ServerName hsi : regionServers) {
821 AdminProtos.AdminService.BlockingInterface server = ((HConnection) connection).getAdmin(hsi);
822
823
824 List<HRegionInfo> regions = ProtobufUtil.getOnlineRegions(server);
825 List<String> regionNames = new ArrayList<String>();
826 for (HRegionInfo hri : regions) {
827 regionNames.add(hri.getRegionNameAsString());
828 }
829 mm.put(hsi, regionNames);
830 }
831 return mm;
832 }
833
834
835
836
837 ServerName findDeployedHSI(Map<ServerName, List<String>> mm, HRegionInfo hri) {
838 for (Map.Entry<ServerName,List <String>> e : mm.entrySet()) {
839 if (e.getValue().contains(hri.getRegionNameAsString())) {
840 return e.getKey();
841 }
842 }
843 return null;
844 }
845
846
847
848
849
850 @Test (timeout=180000)
851 public void testDupeRegion() throws Exception {
852 TableName table =
853 TableName.valueOf("tableDupeRegion");
854 try {
855 setupTable(table);
856 assertNoErrors(doFsck(conf, false));
857 assertEquals(ROWKEYS.length, countRows());
858
859
860 HRegionInfo hriDupe =
861 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("B"));
862
863 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
864 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
865 .waitForAssignment(hriDupe);
866 ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
867 TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
868
869
870
871
872
873 while (findDeployedHSI(getDeployedHRIs((HBaseAdmin) admin), hriDupe) == null) {
874 Thread.sleep(250);
875 }
876
877 LOG.debug("Finished assignment of dupe region");
878
879
880 HBaseFsck hbck = doFsck(conf, false);
881 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DUPE_STARTKEYS,
882 ERROR_CODE.DUPE_STARTKEYS});
883 assertEquals(2, hbck.getOverlapGroups(table).size());
884 assertEquals(ROWKEYS.length, countRows());
885
886
887 doFsck(conf,true);
888
889
890 HBaseFsck hbck2 = doFsck(conf,false);
891 assertNoErrors(hbck2);
892 assertEquals(0, hbck2.getOverlapGroups(table).size());
893 assertEquals(ROWKEYS.length, countRows());
894 } finally {
895 cleanupTable(table);
896 }
897 }
898
899
900
901
902 @Test (timeout=180000)
903 public void testDegenerateRegions() throws Exception {
904 TableName table = TableName.valueOf("tableDegenerateRegions");
905 try {
906 setupTable(table);
907 assertNoErrors(doFsck(conf,false));
908 assertEquals(ROWKEYS.length, countRows());
909
910
911 HRegionInfo hriDupe =
912 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("B"), Bytes.toBytes("B"));
913 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriDupe);
914 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
915 .waitForAssignment(hriDupe);
916 ServerName server = regionStates.getRegionServerOfRegion(hriDupe);
917 TEST_UTIL.assertRegionOnServer(hriDupe, server, REGION_ONLINE_TIMEOUT);
918
919 HBaseFsck hbck = doFsck(conf,false);
920 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.DEGENERATE_REGION, ERROR_CODE.DUPE_STARTKEYS,
921 ERROR_CODE.DUPE_STARTKEYS });
922 assertEquals(2, hbck.getOverlapGroups(table).size());
923 assertEquals(ROWKEYS.length, countRows());
924
925
926 doFsck(conf,true);
927
928
929 HBaseFsck hbck2 = doFsck(conf,false);
930 assertNoErrors(hbck2);
931 assertEquals(0, hbck2.getOverlapGroups(table).size());
932 assertEquals(ROWKEYS.length, countRows());
933 } finally {
934 cleanupTable(table);
935 }
936 }
937
938
939
940
941
942 @Test (timeout=180000)
943 public void testContainedRegionOverlap() throws Exception {
944 TableName table =
945 TableName.valueOf("tableContainedRegionOverlap");
946 try {
947 setupTable(table);
948 assertEquals(ROWKEYS.length, countRows());
949
950
951 HRegionInfo hriOverlap =
952 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B"));
953 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
954 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
955 .waitForAssignment(hriOverlap);
956 ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
957 TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
958
959 HBaseFsck hbck = doFsck(conf, false);
960 assertErrors(hbck, new ERROR_CODE[] {
961 ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
962 assertEquals(2, hbck.getOverlapGroups(table).size());
963 assertEquals(ROWKEYS.length, countRows());
964
965
966 doFsck(conf, true);
967
968
969 HBaseFsck hbck2 = doFsck(conf,false);
970 assertNoErrors(hbck2);
971 assertEquals(0, hbck2.getOverlapGroups(table).size());
972 assertEquals(ROWKEYS.length, countRows());
973 } finally {
974 cleanupTable(table);
975 }
976 }
977
978
979
980
981
982
983
984 @Test (timeout=180000)
985 public void testSidelineOverlapRegion() throws Exception {
986 TableName table =
987 TableName.valueOf("testSidelineOverlapRegion");
988 try {
989 setupTable(table);
990 assertEquals(ROWKEYS.length, countRows());
991
992
993 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
994 HMaster master = cluster.getMaster();
995 HRegionInfo hriOverlap1 =
996 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A"), Bytes.toBytes("AB"));
997 master.assignRegion(hriOverlap1);
998 master.getAssignmentManager().waitForAssignment(hriOverlap1);
999 HRegionInfo hriOverlap2 =
1000 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("AB"), Bytes.toBytes("B"));
1001 master.assignRegion(hriOverlap2);
1002 master.getAssignmentManager().waitForAssignment(hriOverlap2);
1003
1004 HBaseFsck hbck = doFsck(conf, false);
1005 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.DUPE_STARTKEYS,
1006 ERROR_CODE.DUPE_STARTKEYS, ERROR_CODE.OVERLAP_IN_REGION_CHAIN});
1007 assertEquals(3, hbck.getOverlapGroups(table).size());
1008 assertEquals(ROWKEYS.length, countRows());
1009
1010
1011 Multimap<byte[], HbckInfo> overlapGroups = hbck.getOverlapGroups(table);
1012 ServerName serverName = null;
1013 byte[] regionName = null;
1014 for (HbckInfo hbi: overlapGroups.values()) {
1015 if ("A".equals(Bytes.toString(hbi.getStartKey()))
1016 && "B".equals(Bytes.toString(hbi.getEndKey()))) {
1017 regionName = hbi.getRegionName();
1018
1019
1020 int k = cluster.getServerWith(regionName);
1021 for (int i = 0; i < 3; i++) {
1022 if (i != k) {
1023 HRegionServer rs = cluster.getRegionServer(i);
1024 serverName = rs.getServerName();
1025 break;
1026 }
1027 }
1028
1029 HBaseFsckRepair.closeRegionSilentlyAndWait((HConnection) connection,
1030 cluster.getRegionServer(k).getServerName(), hbi.getHdfsHRI());
1031 admin.offline(regionName);
1032 break;
1033 }
1034 }
1035
1036 assertNotNull(regionName);
1037 assertNotNull(serverName);
1038 try (Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService)) {
1039 Put put = new Put(regionName);
1040 put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
1041 Bytes.toBytes(serverName.getHostAndPort()));
1042 meta.put(put);
1043 }
1044
1045
1046 HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
1047 fsck.connect();
1048 fsck.setDisplayFullReport();
1049 fsck.setTimeLag(0);
1050 fsck.setFixAssignments(true);
1051 fsck.setFixMeta(true);
1052 fsck.setFixHdfsHoles(true);
1053 fsck.setFixHdfsOverlaps(true);
1054 fsck.setFixHdfsOrphans(true);
1055 fsck.setFixVersionFile(true);
1056 fsck.setSidelineBigOverlaps(true);
1057 fsck.setMaxMerge(2);
1058 fsck.onlineHbck();
1059 fsck.close();
1060
1061
1062
1063 HBaseFsck hbck2 = doFsck(conf,false);
1064 assertNoErrors(hbck2);
1065 assertEquals(0, hbck2.getOverlapGroups(table).size());
1066 assertTrue(ROWKEYS.length > countRows());
1067 } finally {
1068 cleanupTable(table);
1069 }
1070 }
1071
1072
1073
1074
1075
1076 @Test (timeout=180000)
1077 public void testOverlapAndOrphan() throws Exception {
1078 TableName table =
1079 TableName.valueOf("tableOverlapAndOrphan");
1080 try {
1081 setupTable(table);
1082 assertEquals(ROWKEYS.length, countRows());
1083
1084
1085 admin.disableTable(table);
1086 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1087 Bytes.toBytes("B"), true, true, false, true, HRegionInfo.DEFAULT_REPLICA_ID);
1088 TEST_UTIL.getHBaseAdmin().enableTable(table);
1089
1090 HRegionInfo hriOverlap =
1091 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B"));
1092 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
1093 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
1094 .waitForAssignment(hriOverlap);
1095 ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
1096 TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
1097
1098 HBaseFsck hbck = doFsck(conf, false);
1099 assertErrors(hbck, new ERROR_CODE[] {
1100 ERROR_CODE.ORPHAN_HDFS_REGION, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1101 ERROR_CODE.HOLE_IN_REGION_CHAIN});
1102
1103
1104 doFsck(conf, true);
1105
1106
1107 HBaseFsck hbck2 = doFsck(conf,false);
1108 assertNoErrors(hbck2);
1109 assertEquals(0, hbck2.getOverlapGroups(table).size());
1110 assertEquals(ROWKEYS.length, countRows());
1111 } finally {
1112 cleanupTable(table);
1113 }
1114 }
1115
1116
1117
1118
1119
1120
1121 @Test (timeout=180000)
1122 public void testCoveredStartKey() throws Exception {
1123 TableName table =
1124 TableName.valueOf("tableCoveredStartKey");
1125 try {
1126 setupTable(table);
1127 assertEquals(ROWKEYS.length, countRows());
1128
1129
1130 HRegionInfo hriOverlap =
1131 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B2"));
1132 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
1133 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
1134 .waitForAssignment(hriOverlap);
1135 ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
1136 TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
1137
1138 HBaseFsck hbck = doFsck(conf, false);
1139 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.OVERLAP_IN_REGION_CHAIN,
1140 ERROR_CODE.OVERLAP_IN_REGION_CHAIN });
1141 assertEquals(3, hbck.getOverlapGroups(table).size());
1142 assertEquals(ROWKEYS.length, countRows());
1143
1144
1145 doFsck(conf, true);
1146
1147
1148 HBaseFsck hbck2 = doFsck(conf, false);
1149 assertErrors(hbck2, new ERROR_CODE[0]);
1150 assertEquals(0, hbck2.getOverlapGroups(table).size());
1151 assertEquals(ROWKEYS.length, countRows());
1152 } finally {
1153 cleanupTable(table);
1154 }
1155 }
1156
1157
1158
1159
1160
1161 @Test (timeout=180000)
1162 public void testRegionHole() throws Exception {
1163 TableName table =
1164 TableName.valueOf("tableRegionHole");
1165 try {
1166 setupTable(table);
1167 assertEquals(ROWKEYS.length, countRows());
1168
1169
1170 admin.disableTable(table);
1171 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1172 Bytes.toBytes("C"), true, true, true);
1173 admin.enableTable(table);
1174
1175 HBaseFsck hbck = doFsck(conf, false);
1176 assertErrors(hbck, new ERROR_CODE[] {
1177 ERROR_CODE.HOLE_IN_REGION_CHAIN});
1178
1179 assertEquals(0, hbck.getOverlapGroups(table).size());
1180
1181
1182 doFsck(conf, true);
1183
1184
1185 assertNoErrors(doFsck(conf,false));
1186 assertEquals(ROWKEYS.length - 2 , countRows());
1187 } finally {
1188 cleanupTable(table);
1189 }
1190 }
1191
1192
1193
1194
1195
1196 @Test (timeout=180000)
1197 public void testHDFSRegioninfoMissing() throws Exception {
1198 TableName table = TableName.valueOf("tableHDFSRegioninfoMissing");
1199 try {
1200 setupTable(table);
1201 assertEquals(ROWKEYS.length, countRows());
1202
1203
1204 admin.disableTable(table);
1205 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1206 Bytes.toBytes("C"), true, true, false, true, HRegionInfo.DEFAULT_REPLICA_ID);
1207 TEST_UTIL.getHBaseAdmin().enableTable(table);
1208
1209 HBaseFsck hbck = doFsck(conf, false);
1210 assertErrors(hbck, new ERROR_CODE[] {
1211 ERROR_CODE.ORPHAN_HDFS_REGION,
1212 ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1213 ERROR_CODE.HOLE_IN_REGION_CHAIN});
1214
1215 assertEquals(0, hbck.getOverlapGroups(table).size());
1216
1217
1218 doFsck(conf, true);
1219
1220
1221 assertNoErrors(doFsck(conf, false));
1222 assertEquals(ROWKEYS.length, countRows());
1223 } finally {
1224 cleanupTable(table);
1225 }
1226 }
1227
1228
1229
1230
1231
1232 @Test (timeout=180000)
1233 public void testNotInMetaOrDeployedHole() throws Exception {
1234 TableName table =
1235 TableName.valueOf("tableNotInMetaOrDeployedHole");
1236 try {
1237 setupTable(table);
1238 assertEquals(ROWKEYS.length, countRows());
1239
1240
1241 admin.disableTable(table);
1242 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1243 Bytes.toBytes("C"), true, true, false);
1244 admin.enableTable(table);
1245
1246 HBaseFsck hbck = doFsck(conf, false);
1247 assertErrors(hbck, new ERROR_CODE[] {
1248 ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1249
1250 assertEquals(0, hbck.getOverlapGroups(table).size());
1251
1252
1253 assertErrors(doFsck(conf, true) , new ERROR_CODE[] {
1254 ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1255
1256
1257 assertNoErrors(doFsck(conf,false));
1258 assertEquals(ROWKEYS.length, countRows());
1259 } finally {
1260 cleanupTable(table);
1261 }
1262 }
1263
1264
1265
1266
1267 @Test (timeout=180000)
1268 public void testNotInMetaHole() throws Exception {
1269 TableName table =
1270 TableName.valueOf("tableNotInMetaHole");
1271 try {
1272 setupTable(table);
1273 assertEquals(ROWKEYS.length, countRows());
1274
1275
1276 admin.disableTable(table);
1277 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1278 Bytes.toBytes("C"), false, true, false);
1279 admin.enableTable(table);
1280
1281 HBaseFsck hbck = doFsck(conf, false);
1282 assertErrors(hbck, new ERROR_CODE[] {
1283 ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1284
1285 assertEquals(0, hbck.getOverlapGroups(table).size());
1286
1287
1288 assertErrors(doFsck(conf, true) , new ERROR_CODE[] {
1289 ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1290
1291
1292 assertNoErrors(doFsck(conf,false));
1293 assertEquals(ROWKEYS.length, countRows());
1294 } finally {
1295 cleanupTable(table);
1296 }
1297 }
1298
1299
1300
1301
1302
1303 @Test (timeout=180000)
1304 public void testNotInHdfs() throws Exception {
1305 TableName table =
1306 TableName.valueOf("tableNotInHdfs");
1307 try {
1308 setupTable(table);
1309 assertEquals(ROWKEYS.length, countRows());
1310
1311
1312 admin.flush(table);
1313
1314
1315 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1316 Bytes.toBytes("C"), false, false, true);
1317
1318 HBaseFsck hbck = doFsck(conf, false);
1319 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS});
1320
1321 assertEquals(0, hbck.getOverlapGroups(table).size());
1322
1323
1324 doFsck(conf, true);
1325
1326
1327 assertNoErrors(doFsck(conf,false));
1328 assertEquals(ROWKEYS.length - 2, countRows());
1329 } finally {
1330 cleanupTable(table);
1331 }
1332 }
1333
1334
1335
1336
1337
1338 @Test (timeout=180000)
1339 public void testNotInHdfsWithReplicas() throws Exception {
1340 TableName table =
1341 TableName.valueOf("tableNotInHdfs");
1342 HBaseAdmin admin = new HBaseAdmin(conf);
1343 try {
1344 HRegionInfo[] oldHris = new HRegionInfo[2];
1345 setupTableWithRegionReplica(table, 2);
1346 assertEquals(ROWKEYS.length, countRows());
1347 NavigableMap<HRegionInfo, ServerName> map = MetaScanner.allTableRegions(TEST_UTIL.getConnection(),
1348 tbl.getName());
1349 int i = 0;
1350
1351 for (Map.Entry<HRegionInfo, ServerName> m : map.entrySet()) {
1352 if (m.getKey().getStartKey().length > 0 &&
1353 m.getKey().getStartKey()[0] == Bytes.toBytes("B")[0]) {
1354 LOG.debug("Initially server hosting " + m.getKey() + " is " + m.getValue());
1355 oldHris[i++] = m.getKey();
1356 }
1357 }
1358
1359 TEST_UTIL.getHBaseAdmin().flush(table.getName());
1360
1361
1362 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1363 Bytes.toBytes("C"), false, false, true);
1364
1365 HBaseFsck hbck = doFsck(conf, false);
1366 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS});
1367
1368
1369 doFsck(conf, true);
1370
1371
1372 assertNoErrors(doFsck(conf,false));
1373 assertEquals(ROWKEYS.length - 2, countRows());
1374
1375
1376
1377 i = 0;
1378 HRegionInfo[] newHris = new HRegionInfo[2];
1379
1380 map = MetaScanner.allTableRegions(TEST_UTIL.getConnection(), tbl.getName());
1381
1382 for (Map.Entry<HRegionInfo, ServerName> m : map.entrySet()) {
1383 if (m.getKey().getStartKey().length > 0 &&
1384 m.getKey().getStartKey()[0] == Bytes.toBytes("B")[0]) {
1385 newHris[i++] = m.getKey();
1386 }
1387 }
1388
1389 Collection<ServerName> servers = admin.getClusterStatus().getServers();
1390 Set<HRegionInfo> onlineRegions = new HashSet<HRegionInfo>();
1391 for (ServerName s : servers) {
1392 List<HRegionInfo> list = admin.getOnlineRegions(s);
1393 onlineRegions.addAll(list);
1394 }
1395
1396 assertTrue(onlineRegions.containsAll(Arrays.asList(newHris)));
1397
1398
1399 assertFalse(onlineRegions.removeAll(Arrays.asList(oldHris)));
1400 } finally {
1401 cleanupTable(table);
1402 admin.close();
1403 }
1404 }
1405
1406
1407
1408
1409
1410
1411 @Test (timeout=180000)
1412 public void testNoHdfsTable() throws Exception {
1413 TableName table = TableName.valueOf("NoHdfsTable");
1414 setupTable(table);
1415 assertEquals(ROWKEYS.length, countRows());
1416
1417
1418 admin.flush(table);
1419
1420
1421 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""),
1422 Bytes.toBytes("A"), false, false, true);
1423 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1424 Bytes.toBytes("B"), false, false, true);
1425 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1426 Bytes.toBytes("C"), false, false, true);
1427 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"),
1428 Bytes.toBytes(""), false, false, true);
1429
1430
1431 deleteTableDir(table);
1432
1433 HBaseFsck hbck = doFsck(conf, false);
1434 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS,
1435 ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.NOT_IN_HDFS,
1436 ERROR_CODE.NOT_IN_HDFS,});
1437
1438 assertEquals(0, hbck.getOverlapGroups(table).size());
1439
1440
1441 doFsck(conf, true);
1442
1443
1444 assertNoErrors(doFsck(conf,false));
1445 assertFalse("Table " + table + " should have been deleted", admin.tableExists(table));
1446 }
1447
1448 public void deleteTableDir(TableName table) throws IOException {
1449 Path rootDir = FSUtils.getRootDir(conf);
1450 FileSystem fs = rootDir.getFileSystem(conf);
1451 Path p = FSUtils.getTableDir(rootDir, table);
1452 HBaseFsck.debugLsr(conf, p);
1453 boolean success = fs.delete(p, true);
1454 LOG.info("Deleted " + p + " sucessfully? " + success);
1455 }
1456
1457
1458
1459
1460 @Test (timeout=180000)
1461 public void testNoVersionFile() throws Exception {
1462
1463 Path rootDir = FSUtils.getRootDir(conf);
1464 FileSystem fs = rootDir.getFileSystem(conf);
1465 Path versionFile = new Path(rootDir, HConstants.VERSION_FILE_NAME);
1466 fs.delete(versionFile, true);
1467
1468
1469 HBaseFsck hbck = doFsck(conf, false);
1470 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_VERSION_FILE });
1471
1472 doFsck(conf, true);
1473
1474
1475 assertNoErrors(doFsck(conf, false));
1476 }
1477
1478
1479
1480
1481 @Test (timeout=180000)
1482 public void testRegionShouldNotBeDeployed() throws Exception {
1483 TableName table =
1484 TableName.valueOf("tableRegionShouldNotBeDeployed");
1485 try {
1486 LOG.info("Starting testRegionShouldNotBeDeployed.");
1487 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
1488 assertTrue(cluster.waitForActiveAndReadyMaster());
1489
1490
1491 byte[][] SPLIT_KEYS = new byte[][] { new byte[0], Bytes.toBytes("aaa"),
1492 Bytes.toBytes("bbb"), Bytes.toBytes("ccc"), Bytes.toBytes("ddd") };
1493 HTableDescriptor htdDisabled = new HTableDescriptor(table);
1494 htdDisabled.addFamily(new HColumnDescriptor(FAM));
1495
1496
1497 FSTableDescriptors fstd = new FSTableDescriptors(conf);
1498 fstd.createTableDescriptor(htdDisabled);
1499 List<HRegionInfo> disabledRegions =
1500 TEST_UTIL.createMultiRegionsInMeta(conf, htdDisabled, SPLIT_KEYS);
1501
1502
1503 HRegionServer hrs = cluster.getRegionServer(0);
1504
1505
1506 admin.disableTable(table);
1507 admin.enableTable(table);
1508
1509
1510 admin.disableTable(table);
1511 HRegionInfo region = disabledRegions.remove(0);
1512 byte[] regionName = region.getRegionName();
1513
1514
1515 assertTrue(cluster.getServerWith(regionName) == -1);
1516
1517
1518
1519
1520
1521 HRegion r = HRegion.openHRegion(
1522 region, htdDisabled, hrs.getWAL(region), conf);
1523 hrs.addToOnlineRegions(r);
1524
1525 HBaseFsck hbck = doFsck(conf, false);
1526 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.SHOULD_NOT_BE_DEPLOYED });
1527
1528
1529 doFsck(conf, true);
1530
1531
1532 assertNoErrors(doFsck(conf, false));
1533 } finally {
1534 admin.enableTable(table);
1535 cleanupTable(table);
1536 }
1537 }
1538
1539
1540
1541
1542 @Test (timeout=180000)
1543 public void testFixByTable() throws Exception {
1544 TableName table1 =
1545 TableName.valueOf("testFixByTable1");
1546 TableName table2 =
1547 TableName.valueOf("testFixByTable2");
1548 try {
1549 setupTable(table1);
1550
1551 admin.flush(table1);
1552
1553 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1554 Bytes.toBytes("C"), false, false, true);
1555
1556 setupTable(table2);
1557
1558 admin.flush(table2);
1559
1560 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1561 Bytes.toBytes("C"), false, false, true);
1562
1563 HBaseFsck hbck = doFsck(conf, false);
1564 assertErrors(hbck, new ERROR_CODE[] {
1565 ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.NOT_IN_HDFS});
1566
1567
1568 doFsck(conf, true, table1);
1569
1570 assertNoErrors(doFsck(conf, false, table1));
1571
1572 assertErrors(doFsck(conf, false, table2),
1573 new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS});
1574
1575
1576 doFsck(conf, true, table2);
1577
1578 assertNoErrors(doFsck(conf, false));
1579 assertEquals(ROWKEYS.length - 2, countRows());
1580 } finally {
1581 cleanupTable(table1);
1582 cleanupTable(table2);
1583 }
1584 }
1585
1586
1587
1588 @Test (timeout=180000)
1589 public void testLingeringSplitParent() throws Exception {
1590 TableName table =
1591 TableName.valueOf("testLingeringSplitParent");
1592 Table meta = null;
1593 try {
1594 setupTable(table);
1595 assertEquals(ROWKEYS.length, countRows());
1596
1597
1598 admin.flush(table);
1599 HRegionLocation location = tbl.getRegionLocation("B");
1600
1601
1602 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("B"),
1603 Bytes.toBytes("C"), true, true, false);
1604
1605
1606 meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
1607 HRegionInfo hri = location.getRegionInfo();
1608
1609 HRegionInfo a = new HRegionInfo(tbl.getName(),
1610 Bytes.toBytes("B"), Bytes.toBytes("BM"));
1611 HRegionInfo b = new HRegionInfo(tbl.getName(),
1612 Bytes.toBytes("BM"), Bytes.toBytes("C"));
1613
1614 hri.setOffline(true);
1615 hri.setSplit(true);
1616
1617 MetaTableAccessor.addRegionToMeta(meta, hri, a, b);
1618 meta.close();
1619 admin.flush(TableName.META_TABLE_NAME);
1620
1621 HBaseFsck hbck = doFsck(conf, false);
1622 assertErrors(hbck, new ERROR_CODE[] {
1623 ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1624
1625
1626 hbck = doFsck(conf, true);
1627 assertErrors(hbck, new ERROR_CODE[] {
1628 ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN });
1629 assertFalse(hbck.shouldRerun());
1630 hbck = doFsck(conf, false);
1631 assertErrors(hbck, new ERROR_CODE[] {
1632 ERROR_CODE.LINGERING_SPLIT_PARENT, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1633
1634
1635 hbck = new HBaseFsck(conf, hbfsckExecutorService);
1636 hbck.connect();
1637 hbck.setDisplayFullReport();
1638 hbck.setTimeLag(0);
1639 hbck.setFixSplitParents(true);
1640 hbck.onlineHbck();
1641 assertTrue(hbck.shouldRerun());
1642 hbck.close();
1643
1644 Get get = new Get(hri.getRegionName());
1645 Result result = meta.get(get);
1646 assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY,
1647 HConstants.SPLITA_QUALIFIER).isEmpty());
1648 assertTrue(result.getColumnCells(HConstants.CATALOG_FAMILY,
1649 HConstants.SPLITB_QUALIFIER).isEmpty());
1650 admin.flush(TableName.META_TABLE_NAME);
1651
1652
1653 doFsck(conf, true);
1654
1655
1656 assertNoErrors(doFsck(conf, false));
1657 assertEquals(ROWKEYS.length, countRows());
1658 } finally {
1659 cleanupTable(table);
1660 IOUtils.closeQuietly(meta);
1661 }
1662 }
1663
1664
1665
1666
1667
1668 @Test (timeout=180000)
1669 public void testValidLingeringSplitParent() throws Exception {
1670 TableName table =
1671 TableName.valueOf("testLingeringSplitParent");
1672 Table meta = null;
1673 try {
1674 setupTable(table);
1675 assertEquals(ROWKEYS.length, countRows());
1676
1677
1678 admin.flush(table);
1679 HRegionLocation location = tbl.getRegionLocation(Bytes.toBytes("B"));
1680
1681 meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
1682 HRegionInfo hri = location.getRegionInfo();
1683
1684
1685 byte[] regionName = location.getRegionInfo().getRegionName();
1686 admin.splitRegion(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM"));
1687 TestEndToEndSplitTransaction.blockUntilRegionSplit(conf, 60000, regionName, true);
1688
1689
1690
1691
1692 HBaseFsck hbck = doFsck(
1693 conf, true, true, false, false, false, true, true, true, false, false, false, null);
1694 assertErrors(hbck, new ERROR_CODE[] {});
1695
1696
1697 Get get = new Get(hri.getRegionName());
1698 Result result = meta.get(get);
1699 assertNotNull(result);
1700 assertNotNull(MetaTableAccessor.getHRegionInfo(result));
1701
1702 assertEquals(ROWKEYS.length, countRows());
1703
1704
1705 assertEquals(tbl.getStartKeys().length, SPLITS.length + 1 + 1);
1706 assertNoErrors(doFsck(conf, false));
1707 } finally {
1708 cleanupTable(table);
1709 IOUtils.closeQuietly(meta);
1710 }
1711 }
1712
1713
1714
1715
1716
1717 @Test(timeout=75000)
1718 public void testSplitDaughtersNotInMeta() throws Exception {
1719 TableName table = TableName.valueOf("testSplitdaughtersNotInMeta");
1720 Table meta = connection.getTable(TableName.META_TABLE_NAME, tableExecutorService);
1721 try {
1722 setupTable(table);
1723 assertEquals(ROWKEYS.length, countRows());
1724
1725
1726 admin.flush(table);
1727 HRegionLocation location = tbl.getRegionLocation(Bytes.toBytes("B"));
1728
1729 HRegionInfo hri = location.getRegionInfo();
1730
1731
1732
1733 admin.enableCatalogJanitor(false);
1734
1735
1736 byte[] regionName = location.getRegionInfo().getRegionName();
1737 admin.splitRegion(location.getRegionInfo().getRegionName(), Bytes.toBytes("BM"));
1738 TestEndToEndSplitTransaction.blockUntilRegionSplit(conf, 60000, regionName, true);
1739
1740 PairOfSameType<HRegionInfo> daughters =
1741 MetaTableAccessor.getDaughterRegions(meta.get(new Get(regionName)));
1742
1743
1744 Map<HRegionInfo, ServerName> hris = tbl.getRegionLocations();
1745 undeployRegion(connection, hris.get(daughters.getFirst()), daughters.getFirst());
1746 undeployRegion(connection, hris.get(daughters.getSecond()), daughters.getSecond());
1747
1748 List<Delete> deletes = new ArrayList<>();
1749 deletes.add(new Delete(daughters.getFirst().getRegionName()));
1750 deletes.add(new Delete(daughters.getSecond().getRegionName()));
1751 meta.delete(deletes);
1752
1753
1754 RegionStates regionStates = TEST_UTIL.getMiniHBaseCluster().getMaster().
1755 getAssignmentManager().getRegionStates();
1756 regionStates.deleteRegion(daughters.getFirst());
1757 regionStates.deleteRegion(daughters.getSecond());
1758
1759 HBaseFsck hbck = doFsck(conf, false);
1760 assertErrors(hbck,
1761 new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1762 ERROR_CODE.HOLE_IN_REGION_CHAIN });
1763
1764
1765 hbck = doFsck(
1766 conf, true, true, false, false, false, false, false, false, false, false, false, null);
1767 assertErrors(hbck,
1768 new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
1769 ERROR_CODE.HOLE_IN_REGION_CHAIN });
1770
1771
1772 Get get = new Get(hri.getRegionName());
1773 Result result = meta.get(get);
1774 assertNotNull(result);
1775 assertNotNull(MetaTableAccessor.getHRegionInfo(result));
1776
1777 assertEquals(ROWKEYS.length, countRows());
1778
1779
1780 assertEquals(tbl.getStartKeys().length, SPLITS.length + 1 + 1);
1781 assertNoErrors(doFsck(conf, false));
1782 } finally {
1783 admin.enableCatalogJanitor(true);
1784 meta.close();
1785 cleanupTable(table);
1786 }
1787 }
1788
1789
1790
1791
1792
1793 @Test(timeout=120000)
1794 public void testMissingFirstRegion() throws Exception {
1795 TableName table = TableName.valueOf("testMissingFirstRegion");
1796 try {
1797 setupTable(table);
1798 assertEquals(ROWKEYS.length, countRows());
1799
1800
1801 admin.disableTable(table);
1802 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""), Bytes.toBytes("A"), true,
1803 true, true);
1804 admin.enableTable(table);
1805
1806 HBaseFsck hbck = doFsck(conf, false);
1807 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY });
1808
1809 doFsck(conf, true);
1810
1811 assertNoErrors(doFsck(conf, false));
1812 } finally {
1813 cleanupTable(table);
1814 }
1815 }
1816
1817
1818
1819
1820
1821 @Test(timeout=120000)
1822 public void testRegionDeployedNotInHdfs() throws Exception {
1823 TableName table =
1824 TableName.valueOf("testSingleRegionDeployedNotInHdfs");
1825 try {
1826 setupTable(table);
1827 admin.flush(table);
1828
1829
1830 deleteRegion(conf, tbl.getTableDescriptor(),
1831 HConstants.EMPTY_START_ROW, Bytes.toBytes("A"), false,
1832 false, true);
1833
1834 HBaseFsck hbck = doFsck(conf, false);
1835 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_HDFS });
1836
1837 doFsck(conf, true);
1838
1839 assertNoErrors(doFsck(conf, false));
1840 } finally {
1841 cleanupTable(table);
1842 }
1843 }
1844
1845
1846
1847
1848
1849 @Test(timeout=120000)
1850 public void testMissingLastRegion() throws Exception {
1851 TableName table =
1852 TableName.valueOf("testMissingLastRegion");
1853 try {
1854 setupTable(table);
1855 assertEquals(ROWKEYS.length, countRows());
1856
1857
1858 admin.disableTable(table);
1859 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"), Bytes.toBytes(""), true,
1860 true, true);
1861 admin.enableTable(table);
1862
1863 HBaseFsck hbck = doFsck(conf, false);
1864 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY });
1865
1866 doFsck(conf, true);
1867
1868 assertNoErrors(doFsck(conf, false));
1869 } finally {
1870 cleanupTable(table);
1871 }
1872 }
1873
1874
1875
1876
1877 @Test (timeout=180000)
1878 public void testFixAssignmentsAndNoHdfsChecking() throws Exception {
1879 TableName table =
1880 TableName.valueOf("testFixAssignmentsAndNoHdfsChecking");
1881 try {
1882 setupTable(table);
1883 assertEquals(ROWKEYS.length, countRows());
1884
1885
1886 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1887 Bytes.toBytes("B"), true, false, false, false, HRegionInfo.DEFAULT_REPLICA_ID);
1888
1889
1890 HBaseFsck hbck = doFsck(conf, false);
1891 assertErrors(hbck, new ERROR_CODE[] {
1892 ERROR_CODE.NOT_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1893
1894
1895 HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
1896 fsck.connect();
1897 fsck.setDisplayFullReport();
1898 fsck.setTimeLag(0);
1899 fsck.setCheckHdfs(false);
1900 fsck.onlineHbck();
1901 assertErrors(fsck, new ERROR_CODE[] {
1902 ERROR_CODE.NOT_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
1903 fsck.close();
1904
1905
1906 fsck = new HBaseFsck(conf, hbfsckExecutorService);
1907 fsck.connect();
1908 fsck.setDisplayFullReport();
1909 fsck.setTimeLag(0);
1910 fsck.setCheckHdfs(false);
1911 fsck.setFixAssignments(true);
1912 fsck.onlineHbck();
1913 assertTrue(fsck.shouldRerun());
1914 fsck.onlineHbck();
1915 assertNoErrors(fsck);
1916
1917 assertEquals(ROWKEYS.length, countRows());
1918
1919 fsck.close();
1920 } finally {
1921 cleanupTable(table);
1922 }
1923 }
1924
1925
1926
1927
1928
1929
1930 @Test (timeout=180000)
1931 public void testFixMetaNotWorkingWithNoHdfsChecking() throws Exception {
1932 TableName table =
1933 TableName.valueOf("testFixMetaNotWorkingWithNoHdfsChecking");
1934 try {
1935 setupTable(table);
1936 assertEquals(ROWKEYS.length, countRows());
1937
1938
1939 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1940 Bytes.toBytes("B"), false, true, false, false, HRegionInfo.DEFAULT_REPLICA_ID);
1941
1942
1943 HBaseFsck hbck = doFsck(conf, false);
1944 assertErrors(hbck,
1945 new ERROR_CODE[] { ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN });
1946
1947
1948 HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
1949 fsck.connect();
1950 fsck.setDisplayFullReport();
1951 fsck.setTimeLag(0);
1952 fsck.setCheckHdfs(false);
1953 fsck.onlineHbck();
1954 assertErrors(fsck,
1955 new ERROR_CODE[] { ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN });
1956 fsck.close();
1957
1958
1959 fsck = new HBaseFsck(conf, hbfsckExecutorService);
1960 fsck.connect();
1961 fsck.setDisplayFullReport();
1962 fsck.setTimeLag(0);
1963 fsck.setCheckHdfs(false);
1964 fsck.setFixAssignments(true);
1965 fsck.setFixMeta(true);
1966 fsck.onlineHbck();
1967 assertFalse(fsck.shouldRerun());
1968 assertErrors(fsck,
1969 new ERROR_CODE[] { ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN });
1970 fsck.close();
1971
1972
1973 fsck = doFsck(conf, true);
1974 assertTrue(fsck.shouldRerun());
1975 fsck = doFsck(conf, true);
1976 assertNoErrors(fsck);
1977 } finally {
1978 cleanupTable(table);
1979 }
1980 }
1981
1982
1983
1984
1985
1986 @Test (timeout=180000)
1987 public void testFixHdfsHolesNotWorkingWithNoHdfsChecking() throws Exception {
1988 TableName table =
1989 TableName.valueOf("testFixHdfsHolesNotWorkingWithNoHdfsChecking");
1990 try {
1991 setupTable(table);
1992 assertEquals(ROWKEYS.length, countRows());
1993
1994
1995 admin.disableTable(table);
1996 deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
1997 Bytes.toBytes("B"), true, true, false, true, HRegionInfo.DEFAULT_REPLICA_ID);
1998 TEST_UTIL.getHBaseAdmin().enableTable(table);
1999
2000 HRegionInfo hriOverlap =
2001 createRegion(tbl.getTableDescriptor(), Bytes.toBytes("A2"), Bytes.toBytes("B"));
2002 TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
2003 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
2004 .waitForAssignment(hriOverlap);
2005 ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
2006 TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
2007
2008 HBaseFsck hbck = doFsck(conf, false);
2009 assertErrors(hbck, new ERROR_CODE[] {
2010 ERROR_CODE.ORPHAN_HDFS_REGION, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
2011 ERROR_CODE.HOLE_IN_REGION_CHAIN});
2012
2013
2014 HBaseFsck fsck = new HBaseFsck(conf, hbfsckExecutorService);
2015 fsck.connect();
2016 fsck.setDisplayFullReport();
2017 fsck.setTimeLag(0);
2018 fsck.setCheckHdfs(false);
2019 fsck.onlineHbck();
2020 assertErrors(fsck, new ERROR_CODE[] {
2021 ERROR_CODE.HOLE_IN_REGION_CHAIN});
2022 fsck.close();
2023
2024
2025 fsck = new HBaseFsck(conf, hbfsckExecutorService);
2026 fsck.connect();
2027 fsck.setDisplayFullReport();
2028 fsck.setTimeLag(0);
2029 fsck.setCheckHdfs(false);
2030 fsck.setFixHdfsHoles(true);
2031 fsck.setFixHdfsOverlaps(true);
2032 fsck.setFixHdfsOrphans(true);
2033 fsck.onlineHbck();
2034 assertFalse(fsck.shouldRerun());
2035 assertErrors(fsck, new ERROR_CODE[] { ERROR_CODE.HOLE_IN_REGION_CHAIN});
2036 fsck.close();
2037 } finally {
2038 if (admin.isTableDisabled(table)) {
2039 admin.enableTable(table);
2040 }
2041 cleanupTable(table);
2042 }
2043 }
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053 Path getFlushedHFile(FileSystem fs, TableName table) throws IOException {
2054 Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
2055 Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
2056 Path famDir = new Path(regionDir, FAM_STR);
2057
2058
2059 while (true) {
2060 FileStatus[] hfFss = fs.listStatus(famDir);
2061 if (hfFss.length == 0) {
2062 continue;
2063 }
2064 for (FileStatus hfs : hfFss) {
2065 if (!hfs.isDirectory()) {
2066 return hfs.getPath();
2067 }
2068 }
2069 }
2070 }
2071
2072
2073
2074
2075 @Test(timeout=180000)
2076 public void testQuarantineCorruptHFile() throws Exception {
2077 TableName table = TableName.valueOf(name.getMethodName());
2078 try {
2079 setupTable(table);
2080 assertEquals(ROWKEYS.length, countRows());
2081 admin.flush(table);
2082
2083 FileSystem fs = FileSystem.get(conf);
2084 Path hfile = getFlushedHFile(fs, table);
2085
2086
2087 admin.disableTable(table);
2088
2089
2090 Path corrupt = new Path(hfile.getParent(), "deadbeef");
2091 TestHFile.truncateFile(fs, hfile, corrupt);
2092 LOG.info("Created corrupted file " + corrupt);
2093 HBaseFsck.debugLsr(conf, FSUtils.getRootDir(conf));
2094
2095
2096 HBaseFsck res = HbckTestingUtil.doHFileQuarantine(conf, table);
2097 assertEquals(res.getRetCode(), 0);
2098 HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
2099 assertEquals(hfcc.getHFilesChecked(), 5);
2100 assertEquals(hfcc.getCorrupted().size(), 1);
2101 assertEquals(hfcc.getFailures().size(), 0);
2102 assertEquals(hfcc.getQuarantined().size(), 1);
2103 assertEquals(hfcc.getMissing().size(), 0);
2104
2105
2106 admin.enableTable(table);
2107 } finally {
2108 cleanupTable(table);
2109 }
2110 }
2111
2112
2113
2114
2115 private void doQuarantineTest(TableName table, HBaseFsck hbck, int check,
2116 int corrupt, int fail, int quar, int missing) throws Exception {
2117 try {
2118 setupTable(table);
2119 assertEquals(ROWKEYS.length, countRows());
2120 admin.flush(table);
2121
2122
2123 admin.disableTable(table);
2124
2125 String[] args = {"-sidelineCorruptHFiles", "-repairHoles", "-ignorePreCheckPermission",
2126 table.getNameAsString()};
2127 HBaseFsck res = hbck.exec(hbfsckExecutorService, args);
2128
2129 HFileCorruptionChecker hfcc = res.getHFilecorruptionChecker();
2130 assertEquals(hfcc.getHFilesChecked(), check);
2131 assertEquals(hfcc.getCorrupted().size(), corrupt);
2132 assertEquals(hfcc.getFailures().size(), fail);
2133 assertEquals(hfcc.getQuarantined().size(), quar);
2134 assertEquals(hfcc.getMissing().size(), missing);
2135
2136
2137 admin.enableTableAsync(table);
2138 while (!admin.isTableEnabled(table)) {
2139 try {
2140 Thread.sleep(250);
2141 } catch (InterruptedException e) {
2142 e.printStackTrace();
2143 fail("Interrupted when trying to enable table " + table);
2144 }
2145 }
2146 } finally {
2147 cleanupTable(table);
2148 }
2149 }
2150
2151
2152
2153
2154
2155 @Test(timeout=180000)
2156 public void testQuarantineMissingHFile() throws Exception {
2157 TableName table = TableName.valueOf(name.getMethodName());
2158
2159
2160 final FileSystem fs = FileSystem.get(conf);
2161 HBaseFsck hbck = new HBaseFsck(conf, hbfsckExecutorService) {
2162 @Override
2163 public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
2164 return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
2165 AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
2166 @Override
2167 protected void checkHFile(Path p) throws IOException {
2168 if (attemptedFirstHFile.compareAndSet(false, true)) {
2169 assertTrue(fs.delete(p, true));
2170 }
2171 super.checkHFile(p);
2172 }
2173 };
2174 }
2175 };
2176 doQuarantineTest(table, hbck, 4, 0, 0, 0, 1);
2177 hbck.close();
2178 }
2179
2180
2181
2182
2183
2184
2185
2186 @Ignore @Test(timeout=180000)
2187 public void testQuarantineMissingFamdir() throws Exception {
2188 TableName table = TableName.valueOf(name.getMethodName());
2189
2190 final FileSystem fs = FileSystem.get(conf);
2191 HBaseFsck hbck = new HBaseFsck(conf, hbfsckExecutorService) {
2192 @Override
2193 public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException {
2194 return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
2195 AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
2196 @Override
2197 protected void checkColFamDir(Path p) throws IOException {
2198 if (attemptedFirstHFile.compareAndSet(false, true)) {
2199 assertTrue(fs.delete(p, true));
2200 }
2201 super.checkColFamDir(p);
2202 }
2203 };
2204 }
2205 };
2206 doQuarantineTest(table, hbck, 3, 0, 0, 0, 1);
2207 hbck.close();
2208 }
2209
2210
2211
2212
2213
2214 @Test(timeout=180000)
2215 public void testQuarantineMissingRegionDir() throws Exception {
2216 TableName table = TableName.valueOf(name.getMethodName());
2217
2218 final FileSystem fs = FileSystem.get(conf);
2219 HBaseFsck hbck = new HBaseFsck(conf, hbfsckExecutorService) {
2220 @Override
2221 public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles)
2222 throws IOException {
2223 return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) {
2224 AtomicBoolean attemptedFirstHFile = new AtomicBoolean(false);
2225 @Override
2226 protected void checkRegionDir(Path p) throws IOException {
2227 if (attemptedFirstHFile.compareAndSet(false, true)) {
2228 assertTrue(fs.delete(p, true));
2229 }
2230 super.checkRegionDir(p);
2231 }
2232 };
2233 }
2234 };
2235 doQuarantineTest(table, hbck, 3, 0, 0, 0, 1);
2236 hbck.close();
2237 }
2238
2239
2240
2241
2242 @Test (timeout=180000)
2243 public void testLingeringReferenceFile() throws Exception {
2244 TableName table =
2245 TableName.valueOf("testLingeringReferenceFile");
2246 try {
2247 setupTable(table);
2248 assertEquals(ROWKEYS.length, countRows());
2249
2250
2251 FileSystem fs = FileSystem.get(conf);
2252 Path tableDir= FSUtils.getTableDir(FSUtils.getRootDir(conf), table);
2253 Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
2254 Path famDir = new Path(regionDir, FAM_STR);
2255 Path fakeReferenceFile = new Path(famDir, "fbce357483ceea.12144538");
2256 fs.create(fakeReferenceFile);
2257
2258 HBaseFsck hbck = doFsck(conf, false);
2259 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.LINGERING_REFERENCE_HFILE });
2260
2261 doFsck(conf, true);
2262
2263 assertNoErrors(doFsck(conf, false));
2264 } finally {
2265 cleanupTable(table);
2266 }
2267 }
2268
2269
2270
2271
2272 @Test (timeout=180000)
2273 public void testMissingRegionInfoQualifier() throws Exception {
2274 Connection connection = ConnectionFactory.createConnection(conf);
2275 TableName table = TableName.valueOf("testMissingRegionInfoQualifier");
2276 try {
2277 setupTable(table);
2278
2279
2280 final List<Delete> deletes = new LinkedList<Delete>();
2281 Table meta = connection.getTable(TableName.META_TABLE_NAME, hbfsckExecutorService);
2282 MetaScanner.metaScan(connection, new MetaScanner.MetaScannerVisitor() {
2283
2284 @Override
2285 public boolean processRow(Result rowResult) throws IOException {
2286 HRegionInfo hri = MetaTableAccessor.getHRegionInfo(rowResult);
2287 if (hri != null && !hri.getTable().isSystemTable()) {
2288 Delete delete = new Delete(rowResult.getRow());
2289 delete.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
2290 deletes.add(delete);
2291 }
2292 return true;
2293 }
2294
2295 @Override
2296 public void close() throws IOException {
2297 }
2298 });
2299 meta.delete(deletes);
2300
2301
2302 meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")).add(
2303 HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER, Bytes.toBytes("node1:60020")));
2304 meta.put(new Put(Bytes.toBytes(table + ",,1361911384013.810e28f59a57da91c66")).add(
2305 HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER, Bytes.toBytes(1362150791183L)));
2306 meta.close();
2307
2308 HBaseFsck hbck = doFsck(conf, false);
2309 assertTrue(hbck.getErrors().getErrorList().contains(ERROR_CODE.EMPTY_META_CELL));
2310
2311
2312 hbck = doFsck(conf, true);
2313
2314
2315 assertFalse(hbck.getErrors().getErrorList().contains(ERROR_CODE.EMPTY_META_CELL));
2316 } finally {
2317 cleanupTable(table);
2318 }
2319 connection.close();
2320 }
2321
2322
2323
2324
2325
2326 @Test (timeout=180000)
2327 public void testErrorReporter() throws Exception {
2328 try {
2329 MockErrorReporter.calledCount = 0;
2330 doFsck(conf, false);
2331 assertEquals(MockErrorReporter.calledCount, 0);
2332
2333 conf.set("hbasefsck.errorreporter", MockErrorReporter.class.getName());
2334 doFsck(conf, false);
2335 assertTrue(MockErrorReporter.calledCount > 20);
2336 } finally {
2337 conf.set("hbasefsck.errorreporter",
2338 PrintingErrorReporter.class.getName());
2339 MockErrorReporter.calledCount = 0;
2340 }
2341 }
2342
2343 static class MockErrorReporter implements ErrorReporter {
2344 static int calledCount = 0;
2345
2346 @Override
2347 public void clear() {
2348 calledCount++;
2349 }
2350
2351 @Override
2352 public void report(String message) {
2353 calledCount++;
2354 }
2355
2356 @Override
2357 public void reportError(String message) {
2358 calledCount++;
2359 }
2360
2361 @Override
2362 public void reportError(ERROR_CODE errorCode, String message) {
2363 calledCount++;
2364 }
2365
2366 @Override
2367 public void reportError(ERROR_CODE errorCode, String message, TableInfo table) {
2368 calledCount++;
2369 }
2370
2371 @Override
2372 public void reportError(ERROR_CODE errorCode,
2373 String message, TableInfo table, HbckInfo info) {
2374 calledCount++;
2375 }
2376
2377 @Override
2378 public void reportError(ERROR_CODE errorCode, String message,
2379 TableInfo table, HbckInfo info1, HbckInfo info2) {
2380 calledCount++;
2381 }
2382
2383 @Override
2384 public int summarize() {
2385 return ++calledCount;
2386 }
2387
2388 @Override
2389 public void detail(String details) {
2390 calledCount++;
2391 }
2392
2393 @Override
2394 public ArrayList<ERROR_CODE> getErrorList() {
2395 calledCount++;
2396 return new ArrayList<ERROR_CODE>();
2397 }
2398
2399 @Override
2400 public void progress() {
2401 calledCount++;
2402 }
2403
2404 @Override
2405 public void print(String message) {
2406 calledCount++;
2407 }
2408
2409 @Override
2410 public void resetErrors() {
2411 calledCount++;
2412 }
2413
2414 @Override
2415 public boolean tableHasErrors(TableInfo table) {
2416 calledCount++;
2417 return false;
2418 }
2419 }
2420
2421 @Test(timeout=180000)
2422 public void testCheckTableLocks() throws Exception {
2423 IncrementingEnvironmentEdge edge = new IncrementingEnvironmentEdge(0);
2424 EnvironmentEdgeManager.injectEdge(edge);
2425
2426 HBaseFsck hbck = doFsck(conf, false);
2427 assertNoErrors(hbck);
2428
2429 ServerName mockName = ServerName.valueOf("localhost", 60000, 1);
2430 final TableName tableName = TableName.valueOf("foo");
2431
2432
2433 final TableLockManager tableLockManager =
2434 TableLockManager.createTableLockManager(conf, TEST_UTIL.getZooKeeperWatcher(), mockName);
2435 TableLock writeLock = tableLockManager.writeLock(tableName, "testCheckTableLocks");
2436 writeLock.acquire();
2437 hbck = doFsck(conf, false);
2438 assertNoErrors(hbck);
2439
2440 edge.incrementTime(conf.getLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT,
2441 TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS));
2442
2443 hbck = doFsck(conf, false);
2444 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK});
2445
2446 final CountDownLatch latch = new CountDownLatch(1);
2447 new Thread() {
2448 @Override
2449 public void run() {
2450 TableLock readLock = tableLockManager.writeLock(tableName, "testCheckTableLocks");
2451 try {
2452 latch.countDown();
2453 readLock.acquire();
2454 } catch (IOException ex) {
2455 fail();
2456 } catch (IllegalStateException ex) {
2457 return;
2458 }
2459 fail("should not have come here");
2460 };
2461 }.start();
2462
2463 latch.await();
2464 Threads.sleep(300);
2465
2466 hbck = doFsck(conf, false);
2467 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK});
2468
2469 edge.incrementTime(conf.getLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT,
2470 TableLockManager.DEFAULT_TABLE_LOCK_EXPIRE_TIMEOUT_MS));
2471
2472 hbck = doFsck(conf, false);
2473 assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.EXPIRED_TABLE_LOCK, ERROR_CODE.EXPIRED_TABLE_LOCK});
2474
2475 conf.setLong(TableLockManager.TABLE_LOCK_EXPIRE_TIMEOUT, 1);
2476
2477 Threads.sleep(10);
2478 hbck = doFsck(conf, true);
2479
2480 hbck = doFsck(conf, false);
2481 assertNoErrors(hbck);
2482
2483
2484 writeLock = tableLockManager.writeLock(tableName, "should acquire without blocking");
2485 writeLock.acquire();
2486 writeLock.release();
2487 tableLockManager.tableDeleted(tableName);
2488 }
2489
2490
2491
2492
2493 @Test
2494 public void testOrphanedTableZNode() throws Exception {
2495 TableName table = TableName.valueOf("testOrphanedZKTableEntry");
2496
2497 try {
2498 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getTableStateManager()
2499 .setTableState(table, ZooKeeperProtos.Table.State.ENABLING);
2500
2501 try {
2502 setupTable(table);
2503 Assert.fail(
2504 "Create table should fail when its ZNode has already existed with ENABLING state.");
2505 } catch(TableExistsException t) {
2506
2507 }
2508
2509 try {
2510 cleanupTable(table);
2511 } catch (IOException e) {
2512
2513
2514 }
2515
2516 HBaseFsck hbck = doFsck(conf, false);
2517 assertTrue(hbck.getErrors().getErrorList().contains(ERROR_CODE.ORPHANED_ZK_TABLE_ENTRY));
2518
2519
2520 hbck = doFsck(conf, true);
2521
2522
2523 hbck = doFsck(conf, false);
2524 assertFalse(hbck.getErrors().getErrorList().contains(ERROR_CODE.ORPHANED_ZK_TABLE_ENTRY));
2525
2526 setupTable(table);
2527 } finally {
2528
2529
2530 try {
2531 cleanupTable(table);
2532 } catch (IOException e) {
2533
2534
2535 }
2536 }
2537 }
2538
2539 @Test (timeout=180000)
2540 public void testMetaOffline() throws Exception {
2541
2542 HBaseFsck hbck = doFsck(conf, false);
2543 assertNoErrors(hbck);
2544 deleteMetaRegion(conf, true, false, false);
2545 hbck = doFsck(conf, false);
2546
2547
2548 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
2549 hbck = doFsck(conf, true);
2550 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_META_REGION, ERROR_CODE.UNKNOWN });
2551 hbck = doFsck(conf, false);
2552 assertNoErrors(hbck);
2553 }
2554
2555 private void deleteMetaRegion(Configuration conf, boolean unassign, boolean hdfs,
2556 boolean regionInfoOnly) throws IOException, InterruptedException {
2557 HRegionLocation metaLocation = connection.getRegionLocator(TableName.META_TABLE_NAME)
2558 .getRegionLocation(HConstants.EMPTY_START_ROW);
2559 ServerName hsa = metaLocation.getServerName();
2560 HRegionInfo hri = metaLocation.getRegionInfo();
2561 if (unassign) {
2562 LOG.info("Undeploying meta region " + hri + " from server " + hsa);
2563 try (Connection unmanagedConnection = ConnectionFactory.createConnection(conf)) {
2564 undeployRegion(unmanagedConnection, hsa, hri);
2565 }
2566 }
2567
2568 if (regionInfoOnly) {
2569 LOG.info("deleting hdfs .regioninfo data: " + hri.toString() + hsa.toString());
2570 Path rootDir = FSUtils.getRootDir(conf);
2571 FileSystem fs = rootDir.getFileSystem(conf);
2572 Path p = new Path(rootDir + "/" + TableName.META_TABLE_NAME.getNameAsString(),
2573 hri.getEncodedName());
2574 Path hriPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
2575 fs.delete(hriPath, true);
2576 }
2577
2578 if (hdfs) {
2579 LOG.info("deleting hdfs data: " + hri.toString() + hsa.toString());
2580 Path rootDir = FSUtils.getRootDir(conf);
2581 FileSystem fs = rootDir.getFileSystem(conf);
2582 Path p = new Path(rootDir + "/" + TableName.META_TABLE_NAME.getNameAsString(),
2583 hri.getEncodedName());
2584 HBaseFsck.debugLsr(conf, p);
2585 boolean success = fs.delete(p, true);
2586 LOG.info("Deleted " + p + " sucessfully? " + success);
2587 HBaseFsck.debugLsr(conf, p);
2588 }
2589 }
2590
2591 @Test (timeout=180000)
2592 public void testTableWithNoRegions() throws Exception {
2593
2594
2595 TableName table =
2596 TableName.valueOf(name.getMethodName());
2597 try {
2598
2599 HTableDescriptor desc = new HTableDescriptor(table);
2600 HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM));
2601 desc.addFamily(hcd);
2602 createTable(TEST_UTIL, desc, null);
2603 tbl = (HTable) connection.getTable(table, tableExecutorService);
2604
2605
2606 deleteRegion(conf, tbl.getTableDescriptor(), HConstants.EMPTY_START_ROW,
2607 HConstants.EMPTY_END_ROW, false, false, true);
2608
2609 HBaseFsck hbck = doFsck(conf, false);
2610 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_HDFS });
2611
2612 doFsck(conf, true);
2613
2614
2615 doFsck(conf, true);
2616
2617
2618 assertNoErrors(doFsck(conf, false));
2619 } finally {
2620 cleanupTable(table);
2621 }
2622
2623 }
2624
2625 @Test (timeout=180000)
2626 public void testHbckAfterRegionMerge() throws Exception {
2627 TableName table = TableName.valueOf("testMergeRegionFilesInHdfs");
2628 Table meta = null;
2629 try {
2630
2631 TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false);
2632 setupTable(table);
2633 assertEquals(ROWKEYS.length, countRows());
2634
2635
2636 admin.flush(table);
2637 HRegionInfo region1 = tbl.getRegionLocation(Bytes.toBytes("A")).getRegionInfo();
2638 HRegionInfo region2 = tbl.getRegionLocation(Bytes.toBytes("B")).getRegionInfo();
2639
2640 int regionCountBeforeMerge = tbl.getRegionLocations().size();
2641
2642 assertNotEquals(region1, region2);
2643
2644
2645 admin.mergeRegions(region1.getEncodedNameAsBytes(),
2646 region2.getEncodedNameAsBytes(), false);
2647
2648
2649 long timeout = System.currentTimeMillis() + 30 * 1000;
2650 while (true) {
2651 if (tbl.getRegionLocations().size() < regionCountBeforeMerge) {
2652 break;
2653 } else if (System.currentTimeMillis() > timeout) {
2654 fail("Time out waiting on region " + region1.getEncodedName()
2655 + " and " + region2.getEncodedName() + " be merged");
2656 }
2657 Thread.sleep(10);
2658 }
2659
2660 assertEquals(ROWKEYS.length, countRows());
2661
2662 HBaseFsck hbck = doFsck(conf, false);
2663 assertNoErrors(hbck);
2664
2665 } finally {
2666 TEST_UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(true);
2667 cleanupTable(table);
2668 IOUtils.closeQuietly(meta);
2669 }
2670 }
2671
2672 @Test (timeout = 180000)
2673 public void testRegionBoundariesCheck() throws Exception {
2674 HBaseFsck hbck = doFsck(conf, false);
2675 assertNoErrors(hbck);
2676 try {
2677 hbck.checkRegionBoundaries();
2678 } catch (IllegalArgumentException e) {
2679 if (e.getMessage().endsWith("not a valid DFS filename.")) {
2680 fail("Table directory path is not valid." + e.getMessage());
2681 }
2682 }
2683 }
2684
2685 @org.junit.Rule
2686 public TestName name = new TestName();
2687
2688 @Test (timeout=180000)
2689 public void testReadOnlyProperty() throws Exception {
2690 HBaseFsck hbck = doFsck(conf, false);
2691 Assert.assertEquals("shouldIgnorePreCheckPermission", true,
2692 hbck.shouldIgnorePreCheckPermission());
2693
2694 hbck = doFsck(conf, true);
2695 Assert.assertEquals("shouldIgnorePreCheckPermission", false,
2696 hbck.shouldIgnorePreCheckPermission());
2697
2698 hbck = doFsck(conf, true);
2699 hbck.setIgnorePreCheckPermission(true);
2700 Assert.assertEquals("shouldIgnorePreCheckPermission", true,
2701 hbck.shouldIgnorePreCheckPermission());
2702 }
2703
2704 @Test (timeout=180000)
2705 public void testCleanUpDaughtersNotInMetaAfterFailedSplit() throws Exception {
2706 TableName table = TableName.valueOf("testCleanUpDaughtersNotInMetaAfterFailedSplit");
2707 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
2708 try {
2709 HTableDescriptor desc = new HTableDescriptor(table);
2710 desc.addFamily(new HColumnDescriptor(Bytes.toBytes("f")));
2711 createTable(TEST_UTIL, desc, null);
2712 tbl = new HTable(cluster.getConfiguration(), desc.getTableName());
2713 for (int i = 0; i < 5; i++) {
2714 Put p1 = new Put(("r" + i).getBytes());
2715 p1.add(Bytes.toBytes("f"), "q1".getBytes(), "v".getBytes());
2716 tbl.put(p1);
2717 }
2718 admin.flush(desc.getTableName());
2719 List<HRegion> regions = cluster.getRegions(desc.getTableName());
2720 int serverWith = cluster.getServerWith(regions.get(0).getRegionInfo().getRegionName());
2721 HRegionServer regionServer = cluster.getRegionServer(serverWith);
2722 cluster.getServerWith(regions.get(0).getRegionInfo().getRegionName());
2723 SplitTransactionImpl st = new SplitTransactionImpl(regions.get(0), Bytes.toBytes("r3"));
2724 st.prepare();
2725 st.stepsBeforePONR(regionServer, regionServer, false);
2726 AssignmentManager am = cluster.getMaster().getAssignmentManager();
2727 Map<String, RegionState> regionsInTransition = am.getRegionStates().getRegionsInTransition();
2728 for (RegionState state : regionsInTransition.values()) {
2729 am.regionOffline(state.getRegion());
2730 }
2731 ZKAssign.deleteNodeFailSilent(regionServer.getZooKeeper(), regions.get(0).getRegionInfo());
2732 Map<HRegionInfo, ServerName> regionsMap = new HashMap<HRegionInfo, ServerName>();
2733 regionsMap.put(regions.get(0).getRegionInfo(), regionServer.getServerName());
2734 am.assign(regionsMap);
2735 am.waitForAssignment(regions.get(0).getRegionInfo());
2736 HBaseFsck hbck = doFsck(conf, false);
2737 assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
2738 ERROR_CODE.NOT_IN_META_OR_DEPLOYED });
2739
2740 assertEquals(0, hbck.getOverlapGroups(table).size());
2741
2742
2743 assertErrors(
2744 doFsck(
2745 conf, false, true, false, false, false, false, false, false, false, false, false, null),
2746 new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
2747 ERROR_CODE.NOT_IN_META_OR_DEPLOYED });
2748
2749
2750 assertNoErrors(doFsck(conf, false));
2751 assertEquals(5, countRows());
2752 } finally {
2753 if (tbl != null) {
2754 tbl.close();
2755 tbl = null;
2756 }
2757 cleanupTable(table);
2758 }
2759 }
2760
2761
2762 public static class MasterSyncObserver extends BaseMasterObserver {
2763 volatile CountDownLatch tableCreationLatch = null;
2764 volatile CountDownLatch tableDeletionLatch = null;
2765
2766 @Override
2767 public void postCreateTableHandler(final ObserverContext<MasterCoprocessorEnvironment> ctx,
2768 HTableDescriptor desc, HRegionInfo[] regions) throws IOException {
2769
2770 if (tableCreationLatch != null) {
2771 tableCreationLatch.countDown();
2772 }
2773 }
2774
2775 @Override
2776 public void postDeleteTableHandler(final ObserverContext<MasterCoprocessorEnvironment> ctx,
2777 TableName tableName)
2778 throws IOException {
2779
2780 if (tableDeletionLatch != null) {
2781 tableDeletionLatch.countDown();
2782 }
2783 }
2784 }
2785
2786 public static void createTable(HBaseTestingUtility testUtil, HTableDescriptor htd,
2787 byte [][] splitKeys) throws Exception {
2788
2789
2790 MasterSyncObserver observer = (MasterSyncObserver)testUtil.getHBaseCluster().getMaster()
2791 .getMasterCoprocessorHost().findCoprocessor(MasterSyncObserver.class.getName());
2792 observer.tableCreationLatch = new CountDownLatch(1);
2793 if (splitKeys != null) {
2794 admin.createTable(htd, splitKeys);
2795 } else {
2796 admin.createTable(htd);
2797 }
2798 observer.tableCreationLatch.await();
2799 observer.tableCreationLatch = null;
2800 testUtil.waitUntilAllRegionsAssigned(htd.getTableName());
2801 }
2802
2803 public static void deleteTable(HBaseTestingUtility testUtil, TableName tableName)
2804 throws Exception {
2805
2806
2807 MasterSyncObserver observer = (MasterSyncObserver)testUtil.getHBaseCluster().getMaster()
2808 .getMasterCoprocessorHost().findCoprocessor(MasterSyncObserver.class.getName());
2809 observer.tableDeletionLatch = new CountDownLatch(1);
2810 try {
2811 admin.disableTable(tableName);
2812 } catch (Exception e) {
2813 LOG.debug("Table: " + tableName + " already disabled, so just deleting it.");
2814 }
2815 admin.deleteTable(tableName);
2816 observer.tableDeletionLatch.await();
2817 observer.tableDeletionLatch = null;
2818 }
2819 }