/* ExcelCSVLexer.java is a generated file. You probably want to
* edit ExcelCSVLexer.lex to make changes. Use JFlex to generate it.
* JFlex may be obtained from
* the JFlex website.
* This file was tested to work with jflex 1.4 (and may not
* work with more recent version because it needs a skeleton file)
* Run:
* jflex --skel csv.jflex.skel ExcelCSVLexer.lex
* You will then have a file called ExcelCSVLexer.java
*/
/*
* Read files in comma separated value format.
* Copyright (C) 2001-2004 Stephen Ostermiller
* http://ostermiller.org/contact.pl?regarding=Java+Utilities
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* See COPYING.TXT for details.
*/
package com.Ostermiller.util;
import java.io.*;
/**
* Read files in comma separated value format as outputted by the Microsoft
* Excel Spreadsheet program.
* More information about this class is available from ostermiller.org.
*
* Excel CSV is a file format used as a portable representation of a database. * Each line is one entry or record and the fields in a record are separated by commas. *
* If field includes a comma or a new line, the whole field must be surrounded with double quotes. * When the field is in quotes, any quote literals must be escaped by two quotes (""). * Text that comes after quotes that have been closed but come before the next comma will be ignored. *
* Empty fields are returned as as String of length zero: "". The following line has three empty
* fields and three non-empty fields in it. There is an empty field on each end, and one in the
* middle. One token is returned as a space.
*
,second,, ,fifth,*
* Blank lines are always ignored. Other lines will be ignored if they start with a * comment character as set by the setCommentStart() method. *
* An example of how CVSLexer might be used: *
* ExcelCSVLexer shredder = new ExcelCSVLexer(System.in); * String t; * while ((t = shredder.getNextToken()) != null) { * System.out.println("" + shredder.getLineNumber() + " " + t); * } **
* The CSV that Excel outputs differs from the genrally accepted standard CSV standard in several respects: *
* The initial character is a comma, the delimiter cannot be changed * to a quote or other character that has special meaning in CSV. * * @param newDelim delimiter to which to switch. * @throws BadDelimiterException if the character cannot be used as a delimiter. * * @since ostermillerutils 1.00.00 */ public void changeDelimiter(char newDelim) throws BadDelimiterException { if (newDelim == delimiter) return; // no need to do anything. if (!charIsSafe(newDelim)){ throw new BadDelimiterException(newDelim + " is not a safe delimiter."); } updateCharacterClasses(delimiter, newDelim); // keep a record of the current delimiter. delimiter = newDelim; } /** * Change this Lexer so that it uses a new character for quoting. *
* The initial character is a double quote ("), the delimiter cannot be changed
* to a comma or other character that has special meaning in CSV.
*
* @param newQuote character to use for quoting.
* @throws BadQuoteException if the character cannot be used as a quote.
*
* @since ostermillerutils 1.00.00
*/
public void changeQuote(char newQuote) throws BadQuoteException {
if (newQuote == quote) return; // no need to do anything.
if (!charIsSafe(newQuote)){
throw new BadQuoteException(newQuote + " is not a safe quote.");
}
updateCharacterClasses(quote, newQuote);
// keep a record of the current quote.
quote = newQuote;
}
private String unescape(String s){
if (s.indexOf('\"', 1) == s.length()-1){
return s.substring(1, s.length()-1);
}
StringBuffer sb = new StringBuffer(s.length());
for (int i=1; i
* New line breaks that occur in the middle of a token are not
* counted in the line number count.
*
* If no tokens have been returned, the line number is undefined.
*
* @return line number of the last token.
*
* @since ostermillerutils 1.00.00
*/
public int getLineNumber(){
return lines;
}
%}
%unicode
%state BEFORE
%state AFTER
%state COMMENT
CR=([\r])
LF=([\n])
EOL=({CR}|{LF}|{CR}{LF})
/* To change the default delimeter, change the comma in the next four lines */
Separator=([\,])
NotCommaEOLQuote=([^\r\n\,\"])
NotCommaEOL=([^\,\r\n])
IgnoreAfter=(([^\r\n\,])*)
FalseLiteral=([\"]([^\"]|[\"][\"])*)
StringLiteral=({FalseLiteral}[\"])
Value=({NotCommaEOLQuote}(({NotCommaEOL}*))?)
%%
# Comment
* ; Another Comment
* ! Yet another comment
* By default there are no comments in Excel CVS files. Commas and quotes may not be
* used to indicate comment lines.
*
* @param commentDelims list of characters a comment line may start with.
*
* @since ostermillerutils 1.00.00
*/
public void setCommentStart(String commentDelims){
this.commentDelims = commentDelims;
}
private int addLine = 1;
private int lines = 0;
/**
* Get the line number that the last token came from.
*