View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.util;
19  
20  import java.io.FileNotFoundException;
21  import java.io.IOException;
22  import java.util.Comparator;
23  import java.util.List;
24  import java.util.Map;
25  import java.util.TreeMap;
26  import java.util.concurrent.ConcurrentHashMap;
27  import java.util.regex.Matcher;
28  import java.util.regex.Pattern;
29  
30  import org.apache.commons.lang.NotImplementedException;
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.hbase.classification.InterfaceAudience;
34  import org.apache.hadoop.conf.Configuration;
35  import org.apache.hadoop.fs.FSDataInputStream;
36  import org.apache.hadoop.fs.FSDataOutputStream;
37  import org.apache.hadoop.fs.FileStatus;
38  import org.apache.hadoop.fs.FileSystem;
39  import org.apache.hadoop.fs.Path;
40  import org.apache.hadoop.fs.PathFilter;
41  import org.apache.hadoop.hbase.TableName;
42  import org.apache.hadoop.hbase.exceptions.DeserializationException;
43  import org.apache.hadoop.hbase.HConstants;
44  import org.apache.hadoop.hbase.HTableDescriptor;
45  import org.apache.hadoop.hbase.TableDescriptors;
46  import org.apache.hadoop.hbase.TableInfoMissingException;
47  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
48  
49  import com.google.common.annotations.VisibleForTesting;
50  import com.google.common.primitives.Ints;
51  
52  
53  /**
54   * Implementation of {@link TableDescriptors} that reads descriptors from the
55   * passed filesystem.  It expects descriptors to be in a file in the
56   * {@link #TABLEINFO_DIR} subdir of the table's directory in FS.  Can be read-only
57   *  -- i.e. does not modify the filesystem or can be read and write.
58   *
59   * <p>Also has utility for keeping up the table descriptors tableinfo file.
60   * The table schema file is kept in the {@link #TABLEINFO_DIR} subdir
61   * of the table directory in the filesystem.
62   * It has a {@link #TABLEINFO_FILE_PREFIX} and then a suffix that is the
63   * edit sequenceid: e.g. <code>.tableinfo.0000000003</code>.  This sequenceid
64   * is always increasing.  It starts at zero.  The table schema file with the
65   * highest sequenceid has the most recent schema edit. Usually there is one file
66   * only, the most recent but there may be short periods where there are more
67   * than one file. Old files are eventually cleaned.  Presumption is that there
68   * will not be lots of concurrent clients making table schema edits.  If so,
69   * the below needs a bit of a reworking and perhaps some supporting api in hdfs.
70   */
71  @InterfaceAudience.Private
72  public class FSTableDescriptors implements TableDescriptors {
73    private static final Log LOG = LogFactory.getLog(FSTableDescriptors.class);
74    private final FileSystem fs;
75    private final Path rootdir;
76    private final boolean fsreadonly;
77    private volatile boolean usecache;
78    private volatile boolean fsvisited;
79  
80    @VisibleForTesting long cachehits = 0;
81    @VisibleForTesting long invocations = 0;
82  
83    /** The file name prefix used to store HTD in HDFS  */
84    static final String TABLEINFO_FILE_PREFIX = ".tableinfo";
85    static final String TABLEINFO_DIR = ".tabledesc";
86    static final String TMP_DIR = ".tmp";
87  
88    // This cache does not age out the old stuff.  Thinking is that the amount
89    // of data we keep up in here is so small, no need to do occasional purge.
90    // TODO.
91    private final Map<TableName, HTableDescriptor> cache =
92      new ConcurrentHashMap<TableName, HTableDescriptor>();
93  
94    /**
95     * Table descriptor for <code>hbase:meta</code> catalog table
96     */
97     private final HTableDescriptor metaTableDescriptor;
98  
99     /**
100    * Construct a FSTableDescriptors instance using the hbase root dir of the given
101    * conf and the filesystem where that root dir lives.
102    * This instance can do write operations (is not read only).
103    */
104   public FSTableDescriptors(final Configuration conf) throws IOException {
105     this(conf, FSUtils.getCurrentFileSystem(conf), FSUtils.getRootDir(conf));
106   }
107 
108   public FSTableDescriptors(final Configuration conf, final FileSystem fs, final Path rootdir)
109       throws IOException {
110     this(conf, fs, rootdir, false, true);
111   }
112 
113   /**
114    * @param fsreadonly True if we are read-only when it comes to filesystem
115    * operations; i.e. on remove, we do not do delete in fs.
116    */
117   public FSTableDescriptors(final Configuration conf, final FileSystem fs,
118     final Path rootdir, final boolean fsreadonly, final boolean usecache) throws IOException {
119     super();
120     this.fs = fs;
121     this.rootdir = rootdir;
122     this.fsreadonly = fsreadonly;
123     this.usecache = usecache;
124     this.metaTableDescriptor = HTableDescriptor.metaTableDescriptor(conf);
125   }
126 
127   public void setCacheOn() throws IOException {
128     this.cache.clear();
129     this.usecache = true;
130   }
131 
132   public void setCacheOff() throws IOException {
133     this.usecache = false;
134     this.cache.clear();
135   }
136 
137   @VisibleForTesting
138   public boolean isUsecache() {
139     return this.usecache;
140   }
141 
142   /**
143    * Get the current table descriptor for the given table, or null if none exists.
144    *
145    * Uses a local cache of the descriptor but still checks the filesystem on each call
146    * to see if a newer file has been created since the cached one was read.
147    */
148   @Override
149   public HTableDescriptor get(final TableName tablename)
150   throws IOException {
151     invocations++;
152     if (TableName.META_TABLE_NAME.equals(tablename)) {
153       cachehits++;
154       return metaTableDescriptor;
155     }
156     // hbase:meta is already handled. If some one tries to get the descriptor for
157     // .logs, .oldlogs or .corrupt throw an exception.
158     if (HConstants.HBASE_NON_USER_TABLE_DIRS.contains(tablename.getNameAsString())) {
159        throw new IOException("No descriptor found for non table = " + tablename);
160     }
161 
162     if (usecache) {
163       // Look in cache of descriptors.
164       HTableDescriptor cachedtdm = this.cache.get(tablename);
165       if (cachedtdm != null) {
166         cachehits++;
167         return cachedtdm;
168       }
169     }
170     HTableDescriptor tdmt = null;
171     try {
172       tdmt = getTableDescriptorFromFs(fs, rootdir, tablename, !fsreadonly);
173     } catch (NullPointerException e) {
174       LOG.debug("Exception during readTableDecriptor. Current table name = "
175           + tablename, e);
176     } catch (IOException ioe) {
177       LOG.debug("Exception during readTableDecriptor. Current table name = "
178           + tablename, ioe);
179     }
180     // last HTD written wins
181     if (usecache && tdmt != null) {
182       this.cache.put(tablename, tdmt);
183     }
184 
185     return tdmt;
186   }
187 
188   /**
189    * Returns a map from table name to table descriptor for all tables.
190    */
191   @Override
192   public Map<String, HTableDescriptor> getAll()
193   throws IOException {
194     Map<String, HTableDescriptor> htds = new TreeMap<String, HTableDescriptor>();
195 
196     if (fsvisited && usecache) {
197       for (Map.Entry<TableName, HTableDescriptor> entry: this.cache.entrySet()) {
198         htds.put(entry.getKey().toString(), entry.getValue());
199       }
200       // add hbase:meta to the response
201       htds.put(HTableDescriptor.META_TABLEDESC.getTableName().getNameAsString(),
202         HTableDescriptor.META_TABLEDESC);
203     } else {
204       LOG.debug("Fetching table descriptors from the filesystem.");
205       boolean allvisited = true;
206       for (Path d : FSUtils.getTableDirs(fs, rootdir)) {
207         HTableDescriptor htd = null;
208         try {
209           htd = get(FSUtils.getTableName(d));
210         } catch (FileNotFoundException fnfe) {
211           // inability of retrieving one HTD shouldn't stop getting the remaining
212           LOG.warn("Trouble retrieving htd", fnfe);
213         }
214         if (htd == null) {
215           allvisited = false;
216           continue;
217         } else {
218           htds.put(htd.getTableName().getNameAsString(), htd);
219         }
220         fsvisited = allvisited;
221       }
222     }
223     return htds;
224   }
225 
226   /* (non-Javadoc)
227    * @see org.apache.hadoop.hbase.TableDescriptors#getTableDescriptors(org.apache.hadoop.fs.FileSystem, org.apache.hadoop.fs.Path)
228    */
229   @Override
230   public Map<String, HTableDescriptor> getByNamespace(String name)
231   throws IOException {
232     Map<String, HTableDescriptor> htds = new TreeMap<String, HTableDescriptor>();
233     List<Path> tableDirs =
234         FSUtils.getLocalTableDirs(fs, FSUtils.getNamespaceDir(rootdir, name));
235     for (Path d: tableDirs) {
236       HTableDescriptor htd = null;
237       try {
238         htd = get(FSUtils.getTableName(d));
239       } catch (FileNotFoundException fnfe) {
240         // inability of retrieving one HTD shouldn't stop getting the remaining
241         LOG.warn("Trouble retrieving htd", fnfe);
242       }
243       if (htd == null) continue;
244       htds.put(FSUtils.getTableName(d).getNameAsString(), htd);
245     }
246     return htds;
247   }
248 
249   /**
250    * Adds (or updates) the table descriptor to the FileSystem
251    * and updates the local cache with it.
252    */
253   @Override
254   public void add(HTableDescriptor htd) throws IOException {
255     if (fsreadonly) {
256       throw new NotImplementedException("Cannot add a table descriptor - in read only mode");
257     }
258     if (TableName.META_TABLE_NAME.equals(htd.getTableName())) {
259       throw new NotImplementedException();
260     }
261     if (HConstants.HBASE_NON_USER_TABLE_DIRS.contains(htd.getTableName().getNameAsString())) {
262       throw new NotImplementedException(
263         "Cannot add a table descriptor for a reserved subdirectory name: " + htd.getNameAsString());
264     }
265     updateTableDescriptor(htd);
266   }
267 
268   /**
269    * Removes the table descriptor from the local cache and returns it.
270    * If not in read only mode, it also deletes the entire table directory(!)
271    * from the FileSystem.
272    */
273   @Override
274   public HTableDescriptor remove(final TableName tablename)
275   throws IOException {
276     if (fsreadonly) {
277       throw new NotImplementedException("Cannot remove a table descriptor - in read only mode");
278     }
279     Path tabledir = getTableDir(tablename);
280     if (this.fs.exists(tabledir)) {
281       if (!this.fs.delete(tabledir, true)) {
282         throw new IOException("Failed delete of " + tabledir.toString());
283       }
284     }
285     HTableDescriptor descriptor = this.cache.remove(tablename);
286     if (descriptor == null) {
287       return null;
288     } else {
289       return descriptor;
290     }
291   }
292 
293   /**
294    * Checks if a current table info file exists for the given table
295    *
296    * @param tableName name of table
297    * @return true if exists
298    * @throws IOException
299    */
300   public boolean isTableInfoExists(TableName tableName) throws IOException {
301     return getTableInfoPath(tableName) != null;
302   }
303 
304   /**
305    * Find the most current table info file for the given table in the hbase root directory.
306    * @return The file status of the current table info file or null if it does not exist
307    */
308   private FileStatus getTableInfoPath(final TableName tableName) throws IOException {
309     Path tableDir = getTableDir(tableName);
310     return getTableInfoPath(tableDir);
311   }
312 
313   private FileStatus getTableInfoPath(Path tableDir)
314   throws IOException {
315     return getTableInfoPath(fs, tableDir, !fsreadonly);
316   }
317 
318   /**
319    * Find the most current table info file for the table located in the given table directory.
320    *
321    * Looks within the {@link #TABLEINFO_DIR} subdirectory of the given directory for any table info
322    * files and takes the 'current' one - meaning the one with the highest sequence number if present
323    * or no sequence number at all if none exist (for backward compatibility from before there
324    * were sequence numbers).
325    *
326    * @return The file status of the current table info file or null if it does not exist
327    * @throws IOException
328    */
329   public static FileStatus getTableInfoPath(FileSystem fs, Path tableDir)
330   throws IOException {
331     return getTableInfoPath(fs, tableDir, false);
332   }
333 
334   /**
335    * Find the most current table info file for the table in the given table directory.
336    *
337    * Looks within the {@link #TABLEINFO_DIR} subdirectory of the given directory for any table info
338    * files and takes the 'current' one - meaning the one with the highest sequence number if
339    * present or no sequence number at all if none exist (for backward compatibility from before
340    * there were sequence numbers).
341    * If there are multiple table info files found and removeOldFiles is true it also deletes the
342    * older files.
343    *
344    * @return The file status of the current table info file or null if none exist
345    * @throws IOException
346    */
347   private static FileStatus getTableInfoPath(FileSystem fs, Path tableDir, boolean removeOldFiles)
348   throws IOException {
349     Path tableInfoDir = new Path(tableDir, TABLEINFO_DIR);
350     return getCurrentTableInfoStatus(fs, tableInfoDir, removeOldFiles);
351   }
352 
353   /**
354    * Find the most current table info file in the given directory
355    *
356    * Looks within the given directory for any table info files
357    * and takes the 'current' one - meaning the one with the highest sequence number if present
358    * or no sequence number at all if none exist (for backward compatibility from before there
359    * were sequence numbers).
360    * If there are multiple possible files found
361    * and the we're not in read only mode it also deletes the older files.
362    *
363    * @return The file status of the current table info file or null if it does not exist
364    * @throws IOException
365    */
366   // only visible for FSTableDescriptorMigrationToSubdir, can be removed with that
367   static FileStatus getCurrentTableInfoStatus(FileSystem fs, Path dir, boolean removeOldFiles)
368   throws IOException {
369     FileStatus [] status = FSUtils.listStatus(fs, dir, TABLEINFO_PATHFILTER);
370     if (status == null || status.length < 1) return null;
371     FileStatus mostCurrent = null;
372     for (FileStatus file : status) {
373       if (mostCurrent == null || TABLEINFO_FILESTATUS_COMPARATOR.compare(file, mostCurrent) < 0) {
374         mostCurrent = file;
375       }
376     }
377     if (removeOldFiles && status.length > 1) {
378       // Clean away old versions
379       for (FileStatus file : status) {
380         Path path = file.getPath();
381         if (file != mostCurrent) {
382           if (!fs.delete(file.getPath(), false)) {
383             LOG.warn("Failed cleanup of " + path);
384           } else {
385             LOG.debug("Cleaned up old tableinfo file " + path);
386           }
387         }
388       }
389     }
390     return mostCurrent;
391   }
392 
393   /**
394    * Compare {@link FileStatus} instances by {@link Path#getName()}. Returns in
395    * reverse order.
396    */
397   @VisibleForTesting
398   static final Comparator<FileStatus> TABLEINFO_FILESTATUS_COMPARATOR =
399   new Comparator<FileStatus>() {
400     @Override
401     public int compare(FileStatus left, FileStatus right) {
402       return right.compareTo(left);
403     }};
404 
405   /**
406    * Return the table directory in HDFS
407    */
408   @VisibleForTesting Path getTableDir(final TableName tableName) {
409     return FSUtils.getTableDir(rootdir, tableName);
410   }
411 
412   private static final PathFilter TABLEINFO_PATHFILTER = new PathFilter() {
413     @Override
414     public boolean accept(Path p) {
415       // Accept any file that starts with TABLEINFO_NAME
416       return p.getName().startsWith(TABLEINFO_FILE_PREFIX);
417     }};
418 
419   /**
420    * Width of the sequenceid that is a suffix on a tableinfo file.
421    */
422   @VisibleForTesting static final int WIDTH_OF_SEQUENCE_ID = 10;
423 
424   /*
425    * @param number Number to use as suffix.
426    * @return Returns zero-prefixed decimal version of passed
427    * number (Does absolute in case number is negative).
428    */
429   private static String formatTableInfoSequenceId(final int number) {
430     byte [] b = new byte[WIDTH_OF_SEQUENCE_ID];
431     int d = Math.abs(number);
432     for (int i = b.length - 1; i >= 0; i--) {
433       b[i] = (byte)((d % 10) + '0');
434       d /= 10;
435     }
436     return Bytes.toString(b);
437   }
438 
439   /**
440    * Regex to eat up sequenceid suffix on a .tableinfo file.
441    * Use regex because may encounter oldstyle .tableinfos where there is no
442    * sequenceid on the end.
443    */
444   private static final Pattern TABLEINFO_FILE_REGEX =
445     Pattern.compile(TABLEINFO_FILE_PREFIX + "(\\.([0-9]{" + WIDTH_OF_SEQUENCE_ID + "}))?$");
446 
447   /**
448    * @param p Path to a <code>.tableinfo</code> file.
449    * @return The current editid or 0 if none found.
450    */
451   @VisibleForTesting static int getTableInfoSequenceId(final Path p) {
452     if (p == null) return 0;
453     Matcher m = TABLEINFO_FILE_REGEX.matcher(p.getName());
454     if (!m.matches()) throw new IllegalArgumentException(p.toString());
455     String suffix = m.group(2);
456     if (suffix == null || suffix.length() <= 0) return 0;
457     return Integer.parseInt(m.group(2));
458   }
459 
460   /**
461    * @param sequenceid
462    * @return Name of tableinfo file.
463    */
464   @VisibleForTesting static String getTableInfoFileName(final int sequenceid) {
465     return TABLEINFO_FILE_PREFIX + "." + formatTableInfoSequenceId(sequenceid);
466   }
467 
468   /**
469    * Returns the latest table descriptor for the given table directly from the file system
470    * if it exists, bypassing the local cache.
471    * Returns null if it's not found.
472    */
473   public static HTableDescriptor getTableDescriptorFromFs(FileSystem fs,
474     Path hbaseRootDir, TableName tableName) throws IOException {
475     Path tableDir = FSUtils.getTableDir(hbaseRootDir, tableName);
476     return getTableDescriptorFromFs(fs, tableDir);
477   }
478 
479   /**
480    * Returns the latest table descriptor for the table located at the given directory
481    * directly from the file system if it exists.
482    * @throws TableInfoMissingException if there is no descriptor
483    */
484   public static HTableDescriptor getTableDescriptorFromFs(FileSystem fs,
485     Path hbaseRootDir, TableName tableName, boolean rewritePb) throws IOException {
486     Path tableDir = FSUtils.getTableDir(hbaseRootDir, tableName);
487     return getTableDescriptorFromFs(fs, tableDir, rewritePb);
488   }
489   /**
490    * Returns the latest table descriptor for the table located at the given directory
491    * directly from the file system if it exists.
492    * @throws TableInfoMissingException if there is no descriptor
493    */
494   public static HTableDescriptor getTableDescriptorFromFs(FileSystem fs, Path tableDir)
495     throws IOException {
496     return getTableDescriptorFromFs(fs, tableDir, false);
497   }
498 
499   /**
500    * Returns the latest table descriptor for the table located at the given directory
501    * directly from the file system if it exists.
502    * @throws TableInfoMissingException if there is no descriptor
503    */
504   public static HTableDescriptor getTableDescriptorFromFs(FileSystem fs, Path tableDir,
505     boolean rewritePb)
506   throws IOException {
507     FileStatus status = getTableInfoPath(fs, tableDir, false);
508     if (status == null) {
509       throw new TableInfoMissingException("No table descriptor file under " + tableDir);
510     }
511     return readTableDescriptor(fs, status, rewritePb);
512   }
513 
514   private static HTableDescriptor readTableDescriptor(FileSystem fs, FileStatus status,
515       boolean rewritePb) throws IOException {
516     int len = Ints.checkedCast(status.getLen());
517     byte [] content = new byte[len];
518     FSDataInputStream fsDataInputStream = fs.open(status.getPath());
519     try {
520       fsDataInputStream.readFully(content);
521     } finally {
522       fsDataInputStream.close();
523     }
524     HTableDescriptor htd = null;
525     try {
526       htd = HTableDescriptor.parseFrom(content);
527     } catch (DeserializationException e) {
528       // we have old HTableDescriptor here
529       try {
530         HTableDescriptor ohtd = HTableDescriptor.parseFrom(content);
531         LOG.warn("Found old table descriptor, converting to new format for table " +
532           ohtd.getTableName());
533         htd = new HTableDescriptor(ohtd);
534         if (rewritePb) rewriteTableDescriptor(fs, status, htd);
535       } catch (DeserializationException e1) {
536         throw new IOException("content=" + Bytes.toShort(content), e1);
537       }
538     }
539     if (rewritePb && !ProtobufUtil.isPBMagicPrefix(content)) {
540       // Convert the file over to be pb before leaving here.
541       rewriteTableDescriptor(fs, status, htd);
542     }
543     return htd;
544   }
545 
546   private static void rewriteTableDescriptor(final FileSystem fs, final FileStatus status,
547     final HTableDescriptor td)
548   throws IOException {
549     Path tableInfoDir = status.getPath().getParent();
550     Path tableDir = tableInfoDir.getParent();
551     writeTableDescriptor(fs, td, tableDir, status);
552   }
553 
554   /**
555    * Update table descriptor on the file system
556    * @throws IOException Thrown if failed update.
557    * @throws NotImplementedException if in read only mode
558    */
559   @VisibleForTesting Path updateTableDescriptor(HTableDescriptor htd)
560   throws IOException {
561     if (fsreadonly) {
562       throw new NotImplementedException("Cannot update a table descriptor - in read only mode");
563     }
564     Path tableDir = getTableDir(htd.getTableName());
565     Path p = writeTableDescriptor(fs, htd, tableDir, getTableInfoPath(tableDir));
566     if (p == null) throw new IOException("Failed update");
567     LOG.info("Updated tableinfo=" + p);
568     if (usecache) {
569       this.cache.put(htd.getTableName(), htd);
570     }
571     return p;
572   }
573 
574   /**
575    * Deletes all the table descriptor files from the file system.
576    * Used in unit tests only.
577    * @throws NotImplementedException if in read only mode
578    */
579   public void deleteTableDescriptorIfExists(TableName tableName) throws IOException {
580     if (fsreadonly) {
581       throw new NotImplementedException("Cannot delete a table descriptor - in read only mode");
582     }
583 
584     Path tableDir = getTableDir(tableName);
585     Path tableInfoDir = new Path(tableDir, TABLEINFO_DIR);
586     deleteTableDescriptorFiles(fs, tableInfoDir, Integer.MAX_VALUE);
587   }
588 
589   /**
590    * Deletes files matching the table info file pattern within the given directory
591    * whose sequenceId is at most the given max sequenceId.
592    */
593   private static void deleteTableDescriptorFiles(FileSystem fs, Path dir, int maxSequenceId)
594   throws IOException {
595     FileStatus [] status = FSUtils.listStatus(fs, dir, TABLEINFO_PATHFILTER);
596     for (FileStatus file : status) {
597       Path path = file.getPath();
598       int sequenceId = getTableInfoSequenceId(path);
599       if (sequenceId <= maxSequenceId) {
600         boolean success = FSUtils.delete(fs, path, false);
601         if (success) {
602           LOG.debug("Deleted table descriptor at " + path);
603         } else {
604           LOG.error("Failed to delete descriptor at " + path);
605         }
606       }
607     }
608   }
609 
610   /**
611    * Attempts to write a new table descriptor to the given table's directory.
612    * It first writes it to the .tmp dir then uses an atomic rename to move it into place.
613    * It begins at the currentSequenceId + 1 and tries 10 times to find a new sequence number
614    * not already in use.
615    * Removes the current descriptor file if passed in.
616    *
617    * @return Descriptor file or null if we failed write.
618    */
619   private static Path writeTableDescriptor(final FileSystem fs,
620     final HTableDescriptor htd, final Path tableDir,
621     final FileStatus currentDescriptorFile)
622   throws IOException {
623     // Get temporary dir into which we'll first write a file to avoid half-written file phenomenon.
624     // This directory is never removed to avoid removing it out from under a concurrent writer.
625     Path tmpTableDir = new Path(tableDir, TMP_DIR);
626     Path tableInfoDir = new Path(tableDir, TABLEINFO_DIR);
627 
628     // What is current sequenceid?  We read the current sequenceid from
629     // the current file.  After we read it, another thread could come in and
630     // compete with us writing out next version of file.  The below retries
631     // should help in this case some but its hard to do guarantees in face of
632     // concurrent schema edits.
633     int currentSequenceId = currentDescriptorFile == null ? 0 :
634       getTableInfoSequenceId(currentDescriptorFile.getPath());
635     int newSequenceId = currentSequenceId;
636 
637     // Put arbitrary upperbound on how often we retry
638     int retries = 10;
639     int retrymax = currentSequenceId + retries;
640     Path tableInfoDirPath = null;
641     do {
642       newSequenceId += 1;
643       String filename = getTableInfoFileName(newSequenceId);
644       Path tempPath = new Path(tmpTableDir, filename);
645       if (fs.exists(tempPath)) {
646         LOG.debug(tempPath + " exists; retrying up to " + retries + " times");
647         continue;
648       }
649       tableInfoDirPath = new Path(tableInfoDir, filename);
650       try {
651         writeHTD(fs, tempPath, htd);
652         fs.mkdirs(tableInfoDirPath.getParent());
653         if (!fs.rename(tempPath, tableInfoDirPath)) {
654           throw new IOException("Failed rename of " + tempPath + " to " + tableInfoDirPath);
655         }
656         LOG.debug("Wrote descriptor into: " + tableInfoDirPath);
657       } catch (IOException ioe) {
658         // Presume clash of names or something; go around again.
659         LOG.debug("Failed write and/or rename; retrying", ioe);
660         if (!FSUtils.deleteDirectory(fs, tempPath)) {
661           LOG.warn("Failed cleanup of " + tempPath);
662         }
663         tableInfoDirPath = null;
664         continue;
665       }
666       break;
667     } while (newSequenceId < retrymax);
668     if (tableInfoDirPath != null) {
669       // if we succeeded, remove old table info files.
670       deleteTableDescriptorFiles(fs, tableInfoDir, newSequenceId - 1);
671     }
672     return tableInfoDirPath;
673   }
674 
675   private static void writeHTD(final FileSystem fs, final Path p, final HTableDescriptor htd)
676   throws IOException {
677     FSDataOutputStream out = fs.create(p, false);
678     try {
679       // We used to write this file out as a serialized HTD Writable followed by two '\n's and then
680       // the toString version of HTD.  Now we just write out the pb serialization.
681       out.write(htd.toByteArray());
682     } finally {
683       out.close();
684     }
685   }
686 
687   /**
688    * Create new HTableDescriptor in HDFS. Happens when we are creating table.
689    * Used by tests.
690    * @return True if we successfully created file.
691    */
692   public boolean createTableDescriptor(HTableDescriptor htd) throws IOException {
693     return createTableDescriptor(htd, false);
694   }
695 
696   /**
697    * Create new HTableDescriptor in HDFS. Happens when we are creating table. If
698    * forceCreation is true then even if previous table descriptor is present it
699    * will be overwritten
700    *
701    * @return True if we successfully created file.
702    */
703   public boolean createTableDescriptor(HTableDescriptor htd, boolean forceCreation)
704   throws IOException {
705     Path tableDir = getTableDir(htd.getTableName());
706     return createTableDescriptorForTableDirectory(tableDir, htd, forceCreation);
707   }
708 
709   /**
710    * Create a new HTableDescriptor in HDFS in the specified table directory. Happens when we create
711    * a new table or snapshot a table.
712    * @param tableDir table directory under which we should write the file
713    * @param htd description of the table to write
714    * @param forceCreation if <tt>true</tt>,then even if previous table descriptor is present it will
715    *          be overwritten
716    * @return <tt>true</tt> if the we successfully created the file, <tt>false</tt> if the file
717    *         already exists and we weren't forcing the descriptor creation.
718    * @throws IOException if a filesystem error occurs
719    */
720   public boolean createTableDescriptorForTableDirectory(Path tableDir,
721       HTableDescriptor htd, boolean forceCreation) throws IOException {
722     if (fsreadonly) {
723       throw new NotImplementedException("Cannot create a table descriptor - in read only mode");
724     }
725     FileStatus status = getTableInfoPath(fs, tableDir);
726     if (status != null) {
727       LOG.debug("Current tableInfoPath = " + status.getPath());
728       if (!forceCreation) {
729         if (fs.exists(status.getPath()) && status.getLen() > 0) {
730           if (readTableDescriptor(fs, status, false).equals(htd)) {
731             LOG.debug("TableInfo already exists.. Skipping creation");
732             return false;
733           }
734         }
735       }
736     }
737     Path p = writeTableDescriptor(fs, htd, tableDir, status);
738     return p != null;
739   }
740 
741 }
742