View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase;
20  
21  import java.io.IOException;
22  import java.security.SecureRandom;
23  import java.util.Random;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.commons.math.random.RandomData;
28  import org.apache.commons.math.random.RandomDataImpl;
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.fs.FileSystem;
31  import org.apache.hadoop.fs.Path;
32  import org.apache.hadoop.hbase.classification.InterfaceAudience;
33  import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
34  import org.apache.hadoop.hbase.io.crypto.Encryption;
35  import org.apache.hadoop.hbase.io.crypto.KeyProviderForTesting;
36  import org.apache.hadoop.hbase.io.crypto.aes.AES;
37  import org.apache.hadoop.hbase.io.hfile.AbstractHFileWriter;
38  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
39  import org.apache.hadoop.hbase.io.hfile.HFile;
40  import org.apache.hadoop.hbase.io.hfile.HFileContext;
41  import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
42  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
43  import org.apache.hadoop.hbase.util.Bytes;
44  
45  /**
46   * This class runs performance benchmarks for {@link HFile}.
47   */
48  @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
49  public class HFilePerformanceEvaluation {
50    private static final int ROW_LENGTH = 10;
51    private static final int ROW_COUNT = 1000000;
52    private static final int RFILE_BLOCKSIZE = 8 * 1024;
53    private static StringBuilder testSummary = new StringBuilder();
54    
55    // Disable verbose INFO logging from org.apache.hadoop.io.compress.CodecPool
56    static {
57      System.setProperty("org.apache.commons.logging.Log", 
58        "org.apache.commons.logging.impl.SimpleLog");
59      System.setProperty("org.apache.commons.logging.simplelog.log.org.apache.hadoop.io.compress.CodecPool",
60        "WARN");
61    }
62    
63    private static final Log LOG =
64      LogFactory.getLog(HFilePerformanceEvaluation.class.getName());
65  
66    static byte [] format(final int i) {
67      String v = Integer.toString(i);
68      return Bytes.toBytes("0000000000".substring(v.length()) + v);
69    }
70  
71    static ImmutableBytesWritable format(final int i, ImmutableBytesWritable w) {
72      w.set(format(i));
73      return w;
74    }
75  
76    static Cell createCell(final int i) {
77      return createCell(i, HConstants.EMPTY_BYTE_ARRAY);
78    }
79  
80    /**
81     * HFile is Cell-based. It used to be byte arrays.  Doing this test, pass Cells. All Cells
82     * intentionally have same coordinates in all fields but row.
83     * @param i Integer to format as a row Key.
84     * @param value Value to use
85     * @return Created Cell.
86     */
87    static Cell createCell(final int i, final byte [] value) {
88      return createCell(format(i), value);
89    }
90  
91    static Cell createCell(final byte [] keyRow) {
92      return CellUtil.createCell(keyRow);
93    }
94  
95    static Cell createCell(final byte [] keyRow, final byte [] value) {
96      return CellUtil.createCell(keyRow, value);
97    }
98  
99    /**
100    * Add any supported codec or cipher to test the HFile read/write performance. 
101    * Specify "none" to disable codec or cipher or both.  
102    * @throws Exception
103    */
104   private void runBenchmarks() throws Exception {
105     final Configuration conf = new Configuration();
106     final FileSystem fs = FileSystem.get(conf);
107     final Path mf = fs.makeQualified(new Path("performanceevaluation.mapfile"));
108     
109     // codec=none cipher=none
110     runWriteBenchmark(conf, fs, mf, "none", "none");
111     runReadBenchmark(conf, fs, mf, "none", "none");
112     
113     // codec=gz cipher=none
114     runWriteBenchmark(conf, fs, mf, "gz", "none");
115     runReadBenchmark(conf, fs, mf, "gz", "none");
116 
117     // Add configuration for AES cipher
118     final Configuration aesconf = new Configuration();
119     aesconf.set(HConstants.CRYPTO_KEYPROVIDER_CONF_KEY, KeyProviderForTesting.class.getName());
120     aesconf.set(HConstants.CRYPTO_MASTERKEY_NAME_CONF_KEY, "hbase");
121     aesconf.setInt("hfile.format.version", 3);
122     final FileSystem aesfs = FileSystem.get(aesconf);
123     final Path aesmf = aesfs.makeQualified(new Path("performanceevaluation.aes.mapfile"));
124 
125     // codec=none cipher=aes
126     runWriteBenchmark(aesconf, aesfs, aesmf, "none", "aes");
127     runReadBenchmark(aesconf, aesfs, aesmf, "none", "aes");
128 
129     // codec=gz cipher=aes
130     runWriteBenchmark(aesconf, aesfs, aesmf, "gz", "aes");
131     runReadBenchmark(aesconf, aesfs, aesmf, "gz", "aes");
132 
133     // cleanup test files
134     if (fs.exists(mf)) {
135       fs.delete(mf, true);
136     }
137     if (aesfs.exists(aesmf)) {
138       aesfs.delete(aesmf, true);
139     }
140     
141     // Print Result Summary
142     LOG.info("\n***************\n" + "Result Summary" + "\n***************\n");
143     LOG.info(testSummary.toString());
144 
145   }
146 
147   /**
148    * Write a test HFile with the given codec & cipher
149    * @param conf
150    * @param fs
151    * @param mf
152    * @param codec "none", "lzo", "gz", "snappy"
153    * @param cipher "none", "aes"
154    * @throws Exception
155    */
156   private void runWriteBenchmark(Configuration conf, FileSystem fs, Path mf, String codec,
157       String cipher) throws Exception {
158     if (fs.exists(mf)) {
159       fs.delete(mf, true);
160     }
161 
162     runBenchmark(new SequentialWriteBenchmark(conf, fs, mf, ROW_COUNT, codec, cipher),
163         ROW_COUNT, codec, cipher);
164 
165   }
166 
167   /**
168    * Run all the read benchmarks for the test HFile 
169    * @param conf
170    * @param fs
171    * @param mf
172    * @param codec "none", "lzo", "gz", "snappy"
173    * @param cipher "none", "aes"
174    */
175   private void runReadBenchmark(final Configuration conf, final FileSystem fs, final Path mf,
176       final String codec, final String cipher) {
177     PerformanceEvaluationCommons.concurrentReads(new Runnable() {
178       @Override
179       public void run() {
180         try {
181           runBenchmark(new UniformRandomSmallScan(conf, fs, mf, ROW_COUNT),
182             ROW_COUNT, codec, cipher);
183         } catch (Exception e) {
184           testSummary.append("UniformRandomSmallScan failed " + e.getMessage());
185           e.printStackTrace();
186         }
187       }
188     });
189     
190     PerformanceEvaluationCommons.concurrentReads(new Runnable() {
191       @Override
192       public void run() {
193         try {
194           runBenchmark(new UniformRandomReadBenchmark(conf, fs, mf, ROW_COUNT),
195               ROW_COUNT, codec, cipher);
196         } catch (Exception e) {
197           testSummary.append("UniformRandomReadBenchmark failed " + e.getMessage());
198           e.printStackTrace();
199         }
200       }
201     });
202     
203     PerformanceEvaluationCommons.concurrentReads(new Runnable() {
204       @Override
205       public void run() {
206         try {
207           runBenchmark(new GaussianRandomReadBenchmark(conf, fs, mf, ROW_COUNT),
208               ROW_COUNT, codec, cipher);
209         } catch (Exception e) {
210           testSummary.append("GaussianRandomReadBenchmark failed " + e.getMessage());
211           e.printStackTrace();
212         }
213       }
214     });
215     
216     PerformanceEvaluationCommons.concurrentReads(new Runnable() {
217       @Override
218       public void run() {
219         try {
220           runBenchmark(new SequentialReadBenchmark(conf, fs, mf, ROW_COUNT),
221               ROW_COUNT, codec, cipher);
222         } catch (Exception e) {
223           testSummary.append("SequentialReadBenchmark failed " + e.getMessage());
224           e.printStackTrace();
225         }
226       }
227     });    
228 
229   }
230   
231   protected void runBenchmark(RowOrientedBenchmark benchmark, int rowCount,
232       String codec, String cipher) throws Exception {
233     LOG.info("Running " + benchmark.getClass().getSimpleName() + " with codec[" + 
234         codec + "] " + "cipher[" + cipher + "] for " + rowCount + " rows.");
235     
236     long elapsedTime = benchmark.run();
237     
238     LOG.info("Running " + benchmark.getClass().getSimpleName() + " with codec[" + 
239         codec + "] " + "cipher[" + cipher + "] for " + rowCount + " rows took " + 
240         elapsedTime + "ms.");
241     
242     // Store results to print summary at the end
243     testSummary.append("Running ").append(benchmark.getClass().getSimpleName())
244         .append(" with codec[").append(codec).append("] cipher[").append(cipher)
245         .append("] for ").append(rowCount).append(" rows took ").append(elapsedTime)
246         .append("ms.").append("\n");
247   }
248 
249   static abstract class RowOrientedBenchmark {
250 
251     protected final Configuration conf;
252     protected final FileSystem fs;
253     protected final Path mf;
254     protected final int totalRows;
255     protected String codec = "none";
256     protected String cipher = "none";
257 
258     public RowOrientedBenchmark(Configuration conf, FileSystem fs, Path mf,
259         int totalRows, String codec, String cipher) {
260       this.conf = conf;
261       this.fs = fs;
262       this.mf = mf;
263       this.totalRows = totalRows;
264       this.codec = codec;
265       this.cipher = cipher;
266     }
267 
268     public RowOrientedBenchmark(Configuration conf, FileSystem fs, Path mf,
269         int totalRows) {
270       this.conf = conf;
271       this.fs = fs;
272       this.mf = mf;
273       this.totalRows = totalRows;
274     }
275 
276     void setUp() throws Exception {
277       // do nothing
278     }
279 
280     abstract void doRow(int i) throws Exception;
281 
282     protected int getReportingPeriod() {
283       return this.totalRows / 10;
284     }
285 
286     void tearDown() throws Exception {
287       // do nothing
288     }
289 
290     /**
291      * Run benchmark
292      * @return elapsed time.
293      * @throws Exception
294      */
295     long run() throws Exception {
296       long elapsedTime;
297       setUp();
298       long startTime = System.currentTimeMillis();
299       try {
300         for (int i = 0; i < totalRows; i++) {
301           if (i > 0 && i % getReportingPeriod() == 0) {
302             LOG.info("Processed " + i + " rows.");
303           }
304           doRow(i);
305         }
306         elapsedTime = System.currentTimeMillis() - startTime;
307       } finally {
308         tearDown();
309       }
310       return elapsedTime;
311     }
312 
313   }
314 
315   static class SequentialWriteBenchmark extends RowOrientedBenchmark {
316     protected HFile.Writer writer;
317     private Random random = new Random();
318     private byte[] bytes = new byte[ROW_LENGTH];
319 
320     public SequentialWriteBenchmark(Configuration conf, FileSystem fs, Path mf,
321         int totalRows, String codec, String cipher) {
322       super(conf, fs, mf, totalRows, codec, cipher);
323     }
324 
325     @Override
326     void setUp() throws Exception {
327 
328       HFileContextBuilder builder = new HFileContextBuilder()
329           .withCompression(AbstractHFileWriter.compressionByName(codec))
330           .withBlockSize(RFILE_BLOCKSIZE);
331       
332       if (cipher == "aes") {
333         byte[] cipherKey = new byte[AES.KEY_LENGTH];
334         new SecureRandom().nextBytes(cipherKey);
335         builder.withEncryptionContext(Encryption.newContext(conf)
336             .setCipher(Encryption.getCipher(conf, cipher))
337             .setKey(cipherKey));
338       } else if (!"none".equals(cipher)) {
339         throw new IOException("Cipher " + cipher + " not supported.");
340       }
341       
342       HFileContext hFileContext = builder.build();
343 
344       writer = HFile.getWriterFactoryNoCache(conf)
345           .withPath(fs, mf)
346           .withFileContext(hFileContext)
347           .withComparator(new KeyValue.RawBytesComparator())
348           .create();
349     }
350     
351     @Override
352     void doRow(int i) throws Exception {
353       writer.append(createCell(i, generateValue()));
354     }
355 
356     private byte[] generateValue() {
357       random.nextBytes(bytes);
358       return bytes;
359     }
360 
361     @Override
362     protected int getReportingPeriod() {
363       return this.totalRows; // don't report progress
364     }
365 
366     @Override
367     void tearDown() throws Exception {
368       writer.close();
369     }
370 
371   }
372 
373   static abstract class ReadBenchmark extends RowOrientedBenchmark {
374 
375     protected HFile.Reader reader;
376 
377     public ReadBenchmark(Configuration conf, FileSystem fs, Path mf,
378         int totalRows) {
379       super(conf, fs, mf, totalRows);
380     }
381 
382     @Override
383     void setUp() throws Exception {
384       reader = HFile.createReader(this.fs, this.mf, new CacheConfig(this.conf), this.conf);
385       this.reader.loadFileInfo();
386     }
387 
388     @Override
389     void tearDown() throws Exception {
390       reader.close();
391     }
392 
393   }
394 
395   static class SequentialReadBenchmark extends ReadBenchmark {
396     private HFileScanner scanner;
397 
398     public SequentialReadBenchmark(Configuration conf, FileSystem fs,
399       Path mf, int totalRows) {
400       super(conf, fs, mf, totalRows);
401     }
402 
403     @Override
404     void setUp() throws Exception {
405       super.setUp();
406       this.scanner = this.reader.getScanner(false, false);
407       this.scanner.seekTo();
408     }
409 
410     @Override
411     void doRow(int i) throws Exception {
412       if (this.scanner.next()) {
413         // TODO: Fix. Make Scanner do Cells.
414         Cell c = this.scanner.getKeyValue();
415         PerformanceEvaluationCommons.assertKey(format(i + 1), c);
416         PerformanceEvaluationCommons.assertValueSize(c.getValueLength(), ROW_LENGTH);
417       }
418     }
419 
420     @Override
421     protected int getReportingPeriod() {
422       return this.totalRows; // don't report progress
423     }
424 
425   }
426 
427   static class UniformRandomReadBenchmark extends ReadBenchmark {
428 
429     private Random random = new Random();
430 
431     public UniformRandomReadBenchmark(Configuration conf, FileSystem fs,
432         Path mf, int totalRows) {
433       super(conf, fs, mf, totalRows);
434     }
435 
436     @Override
437     void doRow(int i) throws Exception {
438       HFileScanner scanner = this.reader.getScanner(false, true);
439       byte [] b = getRandomRow();
440       if (scanner.seekTo(createCell(b)) < 0) {
441         LOG.info("Not able to seekTo " + new String(b));
442         return;
443       }
444       // TODO: Fix scanner so it does Cells
445       Cell c = scanner.getKeyValue();
446       PerformanceEvaluationCommons.assertKey(b, c);
447       PerformanceEvaluationCommons.assertValueSize(c.getValueLength(), ROW_LENGTH);
448     }
449 
450     private byte [] getRandomRow() {
451       return format(random.nextInt(totalRows));
452     }
453   }
454 
455   static class UniformRandomSmallScan extends ReadBenchmark {
456     private Random random = new Random();
457 
458     public UniformRandomSmallScan(Configuration conf, FileSystem fs,
459         Path mf, int totalRows) {
460       super(conf, fs, mf, totalRows/10);
461     }
462 
463     @Override
464     void doRow(int i) throws Exception {
465       HFileScanner scanner = this.reader.getScanner(false, false);
466       byte [] b = getRandomRow();
467       // System.out.println("Random row: " + new String(b));
468       Cell c = createCell(b);
469       if (scanner.seekTo(c) != 0) {
470         LOG.info("Nonexistent row: " + new String(b));
471         return;
472       }
473       // TODO: HFileScanner doesn't do Cells yet. Temporary fix.
474       c = scanner.getKeyValue();
475       // System.out.println("Found row: " +
476       //  new String(c.getRowArray(), c.getRowOffset(), c.getRowLength()));
477       PerformanceEvaluationCommons.assertKey(b, c);
478       for (int ii = 0; ii < 30; ii++) {
479         if (!scanner.next()) {
480           LOG.info("NOTHING FOLLOWS");
481           return;
482         }
483         c = scanner.getKeyValue();
484         PerformanceEvaluationCommons.assertValueSize(c.getValueLength(), ROW_LENGTH);
485       }
486     }
487 
488     private byte [] getRandomRow() {
489       return format(random.nextInt(totalRows));
490     }
491   }
492 
493   static class GaussianRandomReadBenchmark extends ReadBenchmark {
494 
495     private RandomData randomData = new RandomDataImpl();
496 
497     public GaussianRandomReadBenchmark(Configuration conf, FileSystem fs,
498         Path mf, int totalRows) {
499       super(conf, fs, mf, totalRows);
500     }
501 
502     @Override
503     void doRow(int i) throws Exception {
504       HFileScanner scanner = this.reader.getScanner(false, true);
505       byte[] gaussianRandomRowBytes = getGaussianRandomRowBytes();
506       scanner.seekTo(createCell(gaussianRandomRowBytes));
507       for (int ii = 0; ii < 30; ii++) {
508         if (!scanner.next()) {
509           LOG.info("NOTHING FOLLOWS");
510           return;
511         }
512         // TODO: Fix. Make scanner do Cells.
513         scanner.getKeyValue();
514       }
515     }
516 
517     private byte [] getGaussianRandomRowBytes() {
518       int r = (int) randomData.nextGaussian((double)totalRows / 2.0,
519           (double)totalRows / 10.0);
520       // make sure r falls into [0,totalRows)
521       return format(Math.min(totalRows, Math.max(r,0)));
522     }
523   }
524 
525   /**
526    * @param args
527    * @throws Exception
528    * @throws IOException
529    */
530   public static void main(String[] args) throws Exception {
531     new HFilePerformanceEvaluation().runBenchmarks();
532   }
533 }