001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.net.ftp.parser; 019import java.text.ParseException; 020import java.util.List; 021import java.util.ListIterator; 022 023import org.apache.commons.net.ftp.FTPClientConfig; 024import org.apache.commons.net.ftp.FTPFile; 025 026/** 027 * Implementation FTPFileEntryParser and FTPFileListParser for standard 028 * Unix Systems. 029 * 030 * This class is based on the logic of Daniel Savarese's 031 * DefaultFTPListParser, but adapted to use regular expressions and to fit the 032 * new FTPFileEntryParser interface. 033 * @see org.apache.commons.net.ftp.FTPFileEntryParser FTPFileEntryParser (for usage instructions) 034 */ 035public class UnixFTPEntryParser extends ConfigurableFTPFileEntryParserImpl 036{ 037 038 static final String DEFAULT_DATE_FORMAT 039 = "MMM d yyyy"; //Nov 9 2001 040 041 static final String DEFAULT_RECENT_DATE_FORMAT 042 = "MMM d HH:mm"; //Nov 9 20:06 043 044 static final String NUMERIC_DATE_FORMAT 045 = "yyyy-MM-dd HH:mm"; //2001-11-09 20:06 046 047 // Suffixes used in Japanese listings after the numeric values 048 private static final String JA_MONTH = "\u6708"; 049 private static final String JA_DAY = "\u65e5"; 050 private static final String JA_YEAR = "\u5e74"; 051 052 private static final String DEFAULT_DATE_FORMAT_JA 053 = "M'" + JA_MONTH + "' d'" + JA_DAY + "' yyyy'" + JA_YEAR + "'"; //6月 3日 2003年 054 055 private static final String DEFAULT_RECENT_DATE_FORMAT_JA 056 = "M'" + JA_MONTH + "' d'" + JA_DAY + "' HH:mm"; //8月 17日 20:10 057 058 /** 059 * Some Linux distributions are now shipping an FTP server which formats 060 * file listing dates in an all-numeric format: 061 * <code>"yyyy-MM-dd HH:mm</code>. 062 * This is a very welcome development, and hopefully it will soon become 063 * the standard. However, since it is so new, for now, and possibly 064 * forever, we merely accomodate it, but do not make it the default. 065 * <p> 066 * For now end users may specify this format only via 067 * <code>UnixFTPEntryParser(FTPClientConfig)</code>. 068 * Steve Cohen - 2005-04-17 069 */ 070 public static final FTPClientConfig NUMERIC_DATE_CONFIG = 071 new FTPClientConfig( 072 FTPClientConfig.SYST_UNIX, 073 NUMERIC_DATE_FORMAT, 074 null); 075 076 /** 077 * this is the regular expression used by this parser. 078 * 079 * Permissions: 080 * r the file is readable 081 * w the file is writable 082 * x the file is executable 083 * - the indicated permission is not granted 084 * L mandatory locking occurs during access (the set-group-ID bit is 085 * on and the group execution bit is off) 086 * s the set-user-ID or set-group-ID bit is on, and the corresponding 087 * user or group execution bit is also on 088 * S undefined bit-state (the set-user-ID bit is on and the user 089 * execution bit is off) 090 * t the 1000 (octal) bit, or sticky bit, is on [see chmod(1)], and 091 * execution is on 092 * T the 1000 bit is turned on, and execution is off (undefined bit- 093 * state) 094 * e z/OS external link bit 095 * Final letter may be appended: 096 * + file has extended security attributes (e.g. ACL) 097 * Note: local listings on MacOSX also use '@'; 098 * this is not allowed for here as does not appear to be shown by FTP servers 099 * {@code @} file has extended attributes 100 */ 101 private static final String REGEX = 102 "([bcdelfmpSs-])" // file type 103 +"(((r|-)(w|-)([xsStTL-]))((r|-)(w|-)([xsStTL-]))((r|-)(w|-)([xsStTL-])))\\+?" // permissions 104 105 + "\\s*" // separator TODO why allow it to be omitted?? 106 107 + "(\\d+)" // link count 108 109 + "\\s+" // separator 110 111 + "(?:(\\S+(?:\\s\\S+)*?)\\s+)?" // owner name (optional spaces) 112 + "(?:(\\S+(?:\\s\\S+)*)\\s+)?" // group name (optional spaces) 113 + "(\\d+(?:,\\s*\\d+)?)" // size or n,m 114 115 + "\\s+" // separator 116 117 /* 118 * numeric or standard format date: 119 * yyyy-mm-dd (expecting hh:mm to follow) 120 * MMM [d]d 121 * [d]d MMM 122 * N.B. use non-space for MMM to allow for languages such as German which use 123 * diacritics (e.g. umlaut) in some abbreviations. 124 * Japanese uses numeric day and month with suffixes to distinguish them 125 * [d]dXX [d]dZZ 126 */ 127 + "("+ 128 "(?:\\d+[-/]\\d+[-/]\\d+)" + // yyyy-mm-dd 129 "|(?:\\S{3}\\s+\\d{1,2})" + // MMM [d]d 130 "|(?:\\d{1,2}\\s+\\S{3})" + // [d]d MMM 131 "|(?:\\d{1,2}" + JA_MONTH + "\\s+\\d{1,2}" + JA_DAY + ")"+ 132 ")" 133 134 + "\\s+" // separator 135 136 /* 137 year (for non-recent standard format) - yyyy 138 or time (for numeric or recent standard format) [h]h:mm 139 or Japanese year - yyyyXX 140 */ 141 + "((?:\\d+(?::\\d+)?)|(?:\\d{4}" + JA_YEAR + "))" // (20) 142 143 + "\\s" // separator 144 145 + "(.*)"; // the rest (21) 146 147 148 // if true, leading spaces are trimmed from file names 149 // this was the case for the original implementation 150 final boolean trimLeadingSpaces; // package protected for access from test code 151 152 /** 153 * The default constructor for a UnixFTPEntryParser object. 154 * 155 * @throws IllegalArgumentException 156 * Thrown if the regular expression is unparseable. Should not be seen 157 * under normal conditions. It it is seen, this is a sign that 158 * <code>REGEX</code> is not a valid regular expression. 159 */ 160 public UnixFTPEntryParser() 161 { 162 this(null); 163 } 164 165 /** 166 * This constructor allows the creation of a UnixFTPEntryParser object with 167 * something other than the default configuration. 168 * 169 * @param config The {@link FTPClientConfig configuration} object used to 170 * configure this parser. 171 * @throws IllegalArgumentException 172 * Thrown if the regular expression is unparseable. Should not be seen 173 * under normal conditions. It it is seen, this is a sign that 174 * <code>REGEX</code> is not a valid regular expression. 175 * @since 1.4 176 */ 177 public UnixFTPEntryParser(final FTPClientConfig config) 178 { 179 this(config, false); 180 } 181 182 /** 183 * This constructor allows the creation of a UnixFTPEntryParser object with 184 * something other than the default configuration. 185 * 186 * @param config The {@link FTPClientConfig configuration} object used to 187 * configure this parser. 188 * @param trimLeadingSpaces if {@code true}, trim leading spaces from file names 189 * @throws IllegalArgumentException 190 * Thrown if the regular expression is unparseable. Should not be seen 191 * under normal conditions. It it is seen, this is a sign that 192 * <code>REGEX</code> is not a valid regular expression. 193 * @since 3.4 194 */ 195 public UnixFTPEntryParser(final FTPClientConfig config, final boolean trimLeadingSpaces) 196 { 197 super(REGEX); 198 configure(config); 199 this.trimLeadingSpaces = trimLeadingSpaces; 200 } 201 202 /** 203 * Preparse the list to discard "total nnn" lines 204 */ 205 @Override 206 public List<String> preParse(final List<String> original) { 207 final ListIterator<String> iter = original.listIterator(); 208 while (iter.hasNext()) { 209 final String entry = iter.next(); 210 if (entry.matches("^total \\d+$")) { // NET-389 211 iter.remove(); 212 } 213 } 214 return original; 215 } 216 217 /** 218 * Parses a line of a unix (standard) FTP server file listing and converts 219 * it into a usable format in the form of an <code> FTPFile </code> 220 * instance. If the file listing line doesn't describe a file, 221 * <code> null </code> is returned, otherwise a <code> FTPFile </code> 222 * instance representing the files in the directory is returned. 223 * 224 * @param entry A line of text from the file listing 225 * @return An FTPFile instance corresponding to the supplied entry 226 */ 227 @Override 228 public FTPFile parseFTPEntry(final String entry) { 229 final FTPFile file = new FTPFile(); 230 file.setRawListing(entry); 231 final int type; 232 boolean isDevice = false; 233 234 if (matches(entry)) 235 { 236 final String typeStr = group(1); 237 final String hardLinkCount = group(15); 238 final String usr = group(16); 239 final String grp = group(17); 240 final String filesize = group(18); 241 final String datestr = group(19) + " " + group(20); 242 String name = group(21); 243 if (trimLeadingSpaces) { 244 name = name.replaceFirst("^\\s+", ""); 245 } 246 247 try 248 { 249 if (group(19).contains(JA_MONTH)) { // special processing for Japanese format 250 final FTPTimestampParserImpl jaParser = new FTPTimestampParserImpl(); 251 jaParser.configure(new FTPClientConfig( 252 FTPClientConfig.SYST_UNIX, DEFAULT_DATE_FORMAT_JA, DEFAULT_RECENT_DATE_FORMAT_JA)); 253 file.setTimestamp(jaParser.parseTimestamp(datestr)); 254 } else { 255 file.setTimestamp(super.parseTimestamp(datestr)); 256 } 257 } 258 catch (final ParseException e) 259 { 260 // intentionally do nothing 261 } 262 263 // A 'whiteout' file is an ARTIFICIAL entry in any of several types of 264 // 'translucent' filesystems, of which a 'union' filesystem is one. 265 266 // bcdelfmpSs- 267 switch (typeStr.charAt(0)) 268 { 269 case 'd': 270 type = FTPFile.DIRECTORY_TYPE; 271 break; 272 case 'e': // NET-39 => z/OS external link 273 type = FTPFile.SYMBOLIC_LINK_TYPE; 274 break; 275 case 'l': 276 type = FTPFile.SYMBOLIC_LINK_TYPE; 277 break; 278 case 'b': 279 case 'c': 280 isDevice = true; 281 type = FTPFile.FILE_TYPE; // TODO change this if DEVICE_TYPE implemented 282 break; 283 case 'f': 284 case '-': 285 type = FTPFile.FILE_TYPE; 286 break; 287 default: // e.g. ? and w = whiteout 288 type = FTPFile.UNKNOWN_TYPE; 289 } 290 291 file.setType(type); 292 293 int g = 4; 294 for (int access = 0; access < 3; access++, g += 4) 295 { 296 // Use != '-' to avoid having to check for suid and sticky bits 297 file.setPermission(access, FTPFile.READ_PERMISSION, 298 !group(g).equals("-")); 299 file.setPermission(access, FTPFile.WRITE_PERMISSION, 300 !group(g + 1).equals("-")); 301 302 final String execPerm = group(g + 2); 303 if (!execPerm.equals("-") && !Character.isUpperCase(execPerm.charAt(0))) 304 { 305 file.setPermission(access, FTPFile.EXECUTE_PERMISSION, true); 306 } 307 else 308 { 309 file.setPermission(access, FTPFile.EXECUTE_PERMISSION, false); 310 } 311 } 312 313 if (!isDevice) 314 { 315 try 316 { 317 file.setHardLinkCount(Integer.parseInt(hardLinkCount)); 318 } 319 catch (final NumberFormatException e) 320 { 321 // intentionally do nothing 322 } 323 } 324 325 file.setUser(usr); 326 file.setGroup(grp); 327 328 try 329 { 330 file.setSize(Long.parseLong(filesize)); 331 } 332 catch (final NumberFormatException e) 333 { 334 // intentionally do nothing 335 } 336 337 // oddball cases like symbolic links, file names 338 // with spaces in them. 339 if (type == FTPFile.SYMBOLIC_LINK_TYPE) 340 { 341 342 final int end = name.indexOf(" -> "); 343 // Give up if no link indicator is present 344 if (end == -1) 345 { 346 file.setName(name); 347 } 348 else 349 { 350 file.setName(name.substring(0, end)); 351 file.setLink(name.substring(end + 4)); 352 } 353 354 } 355 else 356 { 357 file.setName(name); 358 } 359 return file; 360 } 361 return null; 362 } 363 364 /** 365 * Defines a default configuration to be used when this class is 366 * instantiated without a {@link FTPClientConfig FTPClientConfig} 367 * parameter being specified. 368 * @return the default configuration for this parser. 369 */ 370 @Override 371 protected FTPClientConfig getDefaultConfiguration() { 372 return new FTPClientConfig( 373 FTPClientConfig.SYST_UNIX, 374 DEFAULT_DATE_FORMAT, 375 DEFAULT_RECENT_DATE_FORMAT); 376 } 377 378}