package com.gebauz.bauzoid.parser;
public class Tokenizer
{
public static final String UNEXPECTED_TOKEN =
"Unexpected Token!";
public static final String UNEXPECTED_END_OF_STRING =
"Unexpected End of String!";
private String mString
;
private int mPosition = -
1;
private char[] mDelimiters =
{';'};
private char[] mWhitespaces =
{' ',
'\n',
'\r',
'\t'};
private char[] mStringDelimiters =
{'"',
'\''};
public Tokenizer
(String str
)
{
mString = str
;
mPosition =
0;
}
public String readToken
(String token
) throws ScanException
{
skipWhitespaces
();
if (isEndOfString
())
throw new ScanException
(UNEXPECTED_END_OF_STRING, getSurroundings
());
if (!isNextString
(token
))
throw new ScanException
(UNEXPECTED_TOKEN +
" Expected: " + token, getSurroundings
());
mPosition += token.
length();
return token
;
}
/** Read until a token or the end of the string is encountered. Does not consume the token itself. */
public String readUntilToken
(String token
)
{
skipWhitespaces
();
int startPos = mPosition
;
String result =
"";
if (isEndOfString
())
return result
;
while (!isNextString
(token
) && !isEndOfString
())
{
skipChar
();
}
return mString.
substring(startPos, mPosition
);
}
/** Read until the end of the string. Advances the string marker to the end. */
public String readUntilEndOfString
()
{
skipWhitespaces
();
if (isEndOfString
())
return "";
String result = mString.
substring(mPosition, mString.
length());
mPosition = mString.
length()-
1;
return Preprocessor.
trim(result, mWhitespaces
);
}
/** Read until the next new line character, or the end of the string. Advances the marker. */
public String readUntilNewLine
()
{
skipWhitespaces
();
if (isEndOfString
())
return "";
int prevPos = mPosition
;
while (!isNextString
("\n") && !isEndOfString
())
{
skipChar
();
}
String result = mString.
substring(prevPos, mPosition
);
return Preprocessor.
trim(result, mWhitespaces
);
}
public boolean checkToken
(String token
)
{
int prevPosition = mPosition
;
try
{
readToken
(token
);
}
catch (ScanException ex
)
{
return false;
}
finally
{
mPosition = prevPosition
;
}
return true;
}
public float readNumber
() throws ScanException
{
skipWhitespaces
();
if (isEndOfString
())
throw new ScanException
(UNEXPECTED_END_OF_STRING, getSurroundings
());
// read numeric values or . until a delimiter or whitespace occurs
float result =
0;
float postCommaFactor = 1.0f
;
boolean numberFound =
false;
boolean commaFound =
false;
float sign =
1;
// check for minus
if (getCurrentChar
() ==
'-')
{
sign = -
1;
skipChar
();
}
while ((!isEndOfString
()) && (isNumeric
(getCurrentChar
()) ||
(getCurrentChar
() ==
'.')))
{
if (isEndOfString
())
break;
if (isWhitespace
(getCurrentChar
()))
break;
if (isDelimiter
(getCurrentChar
()))
break;
if (isNumeric
(getCurrentChar
()))
{
numberFound =
true;
if (!commaFound
)
{
result = result
* 10 +
Character.
digit(getCurrentChar
(),
10);
}
else
{
result = result +
Character.
digit(getCurrentChar
(),
10) / postCommaFactor
;
postCommaFactor
*= 10.0f
;
}
}
else if (getCurrentChar
() ==
'.')
{
// need a digit first
if (!numberFound
)
break;
// check for double commas
if (commaFound
)
break;
commaFound =
true;
postCommaFactor = 10.0f
;
}
else
{
// not a number
break;
}
skipChar
();
}
if (!numberFound
)
{
if (isEndOfString
())
throw new ScanException
(UNEXPECTED_END_OF_STRING, getSurroundings
());
else
throw new ScanException
(UNEXPECTED_TOKEN +
" Expected: Number", getSurroundings
());
}
return (sign
*result
);
}
public boolean checkNumber
()
{
int prevPosition = mPosition
;
try
{
readNumber
();
}
catch (ScanException ex
)
{
return false;
}
finally
{
mPosition = prevPosition
;
}
return true;
}
public String readIdentifier
() throws ScanException
{
// TODO: read alphanumericwithunderscore (starting with alpha or underscore) until delimiter or whitespace
skipWhitespaces
();
if (isEndOfString
())
throw new ScanException
(UNEXPECTED_END_OF_STRING, getSurroundings
());
boolean idFound =
false;
int startChar = mPosition
;
int numChars =
0;
while (!isEndOfString
() && isAlphaNumericOrUnderscore
(getCurrentChar
()))
{
idFound =
true;
numChars++
;
skipChar
();
}
if (!idFound
)
throw new ScanException
(UNEXPECTED_TOKEN +
" Expected: Identifier", getSurroundings
());
return mString.
substring(startChar, startChar + numChars
);
}
public boolean checkIdentifier
()
{
int prevPosition = mPosition
;
try
{
readIdentifier
();
}
catch (ScanException ex
)
{
return false;
}
finally
{
mPosition = prevPosition
;
}
return true;
}
public String readString
() throws ScanException
{
skipWhitespaces
();
if (isEndOfString
())
throw new ScanException
(UNEXPECTED_END_OF_STRING, getSurroundings
());
char usedStringDelimiter = mStringDelimiters
[0];
boolean foundStringDelimiter =
false;
for (char stringDelimiter : mStringDelimiters
)
{
if (getCurrentChar
() == stringDelimiter
)
{
foundStringDelimiter =
true;
usedStringDelimiter = stringDelimiter
;
}
}
if (!foundStringDelimiter
)
throw new ScanException
(UNEXPECTED_TOKEN +
" Expected: String", getSurroundings
());
// skip first string delimiter
skipChar
();
int startChar = mPosition
;
int numChars =
0;
while (getCurrentChar
() != usedStringDelimiter
)
{
if (isEndOfString
())
throw new ScanException
(UNEXPECTED_END_OF_STRING, getSurroundings
());
numChars++
;
skipChar
();
}
// skip last string delimiter
skipChar
();
return mString.
substring(startChar, startChar + numChars
);
}
public boolean checkString
()
{
int prevPosition = mPosition
;
try
{
readString
();
}
catch (ScanException ex
)
{
return false;
}
finally
{
mPosition = prevPosition
;
}
return true;
}
private boolean isWhitespace
(char c
)
{
for (char whitespace : mWhitespaces
)
{
if (c == whitespace
)
return true;
}
return false;
}
private void skipWhitespaces
()
{
if (isEndOfString
())
return;
while (isWhitespace
(getCurrentChar
()))
{
skipChar
();
if (isEndOfString
())
return;
}
}
private boolean isDelimiter
(char c
)
{
for (char delimiter : mDelimiters
)
{
if (c == delimiter
)
return true;
}
return false;
}
private void skipChar
()
{
if (isEndOfString
())
return;
mPosition++
;
}
public boolean isNextString
(String str
)
{
return Preprocessor.
isNextString(mString, mPosition, str
);
/*for (int i = 0; i < str.length(); i++)
{
if (mString.length() <= (mPosition+i))
return false;
if (str.charAt(i) != mString.charAt(mPosition + i))
return false;
}
return true;*/
}
public final boolean isEndOfString
()
{
return (mPosition
>= mString.
length());
}
public final boolean checkNoMoreTokens
()
{
skipWhitespaces
();
return isEndOfString
();
}
public static boolean isNumeric
(char c
)
{
return ((c
>=
'0') && (c
<=
'9'));
}
public static boolean isUpperCaseAlpha
(char c
)
{
return ((c
>=
'A') && (c
<=
'Z'));
}
public static boolean isLowerCaseAlpha
(char c
)
{
return ((c
>=
'a') && (c
<=
'z'));
}
public static boolean isAlpha
(char c
)
{
return (isUpperCaseAlpha
(c
) || isLowerCaseAlpha
(c
));
}
public static boolean isAlphaNumeric
(char c
)
{
return (isAlpha
(c
) || isNumeric
(c
));
}
public static boolean isAlphaNumericOrUnderscore
(char c
)
{
return ((c ==
'_') || isAlphaNumeric
(c
));
}
public final char getCurrentChar
()
{
return mString.
charAt(mPosition
);
}
public final void setWhitespaces
(char[] whitespaces
)
{
mWhitespaces = whitespaces
;
}
public final void setDelimiters
(char[] delimiters
)
{
mDelimiters = delimiters
;
}
public final void setStringDelimiter
(char[] stringDelimiters
)
{
mStringDelimiters = stringDelimiters
;
}
public final void setPosition
(int position
)
{
mPosition = position
;
}
public final int getPosition
()
{
return mPosition
;
}
/** For debugging purposes. */
public final String getSurroundings
()
{
// retrieve the line number
int numLines =
1;
for (int i =
0; i
< mPosition
; i++
)
{
if (mString.
charAt(i
) ==
'\n')
numLines++
;
}
int startIndex =
Math.
max(mPosition -
10,
0);
int endIndex =
Math.
min(mPosition +
10, mString.
length() -
1);
return "Line " + numLines +
" <<<" + mString.
substring(startIndex, endIndex
) +
">>>";
}
}