using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace BauzoidNET
.parser
{
public class Tokenizer
{
public const int POSITION_STACK_SIZE
= 16;
public const string UNEXPECTED_TOKEN
= "Unexpected Token!";
public const string UNEXPECTED_END_OF_STRING
= "Unexpected End of String!";
private string mString
;
private int mPosition
= -1;
private char[] mDelimiters
= {';'};
private char[] mWhitespaces
= {' ',
'\n',
'\r',
'\t'};
private char[] mStringDelimiters
= {'"',
'\''};
private int[] mPositionStack
= new int[POSITION_STACK_SIZE
];
private int mPositionStackPosition
= 0;
public Tokenizer
(string str
)
{
mString
= str
;
mPosition
= 0;
}
/** Push current reading position onto stack for storage and later restore. */
public void pushPosition
()
{
if (mPositionStackPosition
>= POSITION_STACK_SIZE
)
{
return;
}
mPositionStack
[mPositionStackPosition
] = mPosition
;
mPositionStackPosition
++;
}
/** Restore a reading position from stack. */
public void popPosition
()
{
if (mPositionStackPosition
<= 0)
{
return;
}
mPositionStackPosition
--;
mPosition
= mPositionStack
[mPositionStackPosition
];
}
public string readToken
(string token
)
{
skipWhitespaces
();
if (isEndOfString
())
throw new ScanException
(UNEXPECTED_END_OF_STRING, getSurroundings
());
if (!isNextString
(token
))
throw new ScanException
(UNEXPECTED_TOKEN
+ " Expected: " + token, getSurroundings
());
mPosition
+= token
.Length;
return token
;
}
/** Read until a token or the end of the string is encountered. Does not consume the token itself. */
public string readUntilToken
(string token
)
{
skipWhitespaces
();
int startPos
= mPosition
;
string result
= "";
if (isEndOfString
())
return result
;
while (!isNextString
(token
) && !isEndOfString
())
{
skipChar
();
}
return mString
.Substring(startPos, mPosition
- startPos
+ 1);
}
/** Read until the end of the string. Advances the string marker to the end. */
public string readUntilEndOfString
()
{
skipWhitespaces
();
if (isEndOfString
())
return "";
String result
= mString
.Substring(mPosition, mString
.Length);
mPosition
= mString
.Length-1;
return Preprocessor
.trim(result, mWhitespaces
);
}
/** Read until the next new line character, or the end of the string. Advances the marker. */
public string readUntilNewLine
()
{
skipWhitespaces
();
if (isEndOfString
())
return "";
int prevPos
= mPosition
;
while (!isNextString
("\n") && !isEndOfString
())
{
skipChar
();
}
string result
= mString
.Substring(prevPos, mPosition
-prevPos
+1);
return Preprocessor
.trim(result, mWhitespaces
);
}
public bool checkToken
(string token
)
{
skipWhitespaces
();
if (isEndOfString
())
return false;
if (!isNextString
(token
))
return false;
/*
// prevent exceptions in successful paths in C#/.NET
int prevPosition = mPosition;
try
{
readToken(token);
}
catch (ScanException ex)
{
return false;
}
finally
{
mPosition = prevPosition;
}*/
return true;
}
public float readNumber
()
{
float value
= 0;
readNumber
(ref value
);
return value
;
}
public bool readNumber
(ref float outValue,
bool throwExceptionOnError
= true)
{
skipWhitespaces
();
if (isEndOfString
())
{
if (throwExceptionOnError
)
throw new ScanException
(UNEXPECTED_END_OF_STRING, getSurroundings
());
else
return false;
}
// read numeric values or . until a delimiter or whitespace occurs
outValue
= 0;
float postCommaFactor
= 1
.0f
;
bool numberFound
= false;
bool commaFound
= false;
float sign
= 1;
// check for minus
if (getCurrentChar
() == '-')
{
sign
= -1;
skipChar
();
}
while ((!isEndOfString
()) && (isNumeric
(getCurrentChar
()) || (getCurrentChar
() == '.')))
{
if (isEndOfString
())
break;
if (isWhitespace
(getCurrentChar
()))
break;
if (isDelimiter
(getCurrentChar
()))
break;
if (isNumeric
(getCurrentChar
()))
{
numberFound
= true;
if (!commaFound
)
{
//result = result * 10 + Character.digit(getCurrentChar(), 10);
outValue
= outValue
* 10 + Convert
.ToInt32(getCurrentChar
().ToString(),
10);
}
else
{
//result = result + Character.digit(getCurrentChar(), 10) / postCommaFactor;
outValue
= outValue
+ Convert
.ToInt32(getCurrentChar
().ToString(),
10) / postCommaFactor
;
postCommaFactor
*= 10
.0f
;
}
}
else if (getCurrentChar
() == '.')
{
// need a digit first
if (!numberFound
)
break;
// check for double commas
if (commaFound
)
break;
commaFound
= true;
postCommaFactor
= 10
.0f
;
}
else
{
// not a number
break;
}
skipChar
();
}
if (!numberFound
)
{
if (isEndOfString
())
{
if (throwExceptionOnError
)
throw new ScanException
(UNEXPECTED_END_OF_STRING, getSurroundings
());
else
return false;
}
else
{
if (throwExceptionOnError
)
throw new ScanException
(UNEXPECTED_TOKEN
+ " Expected: Number", getSurroundings
());
else
return false;
}
}
outValue
*= sign
;
return true;
}
public bool checkNumber
()
{
int prevPosition
= mPosition
;
float value
= 0;
bool result
= readNumber
(ref value,
false);
mPosition
= prevPosition
;
return result
;
/*int prevPosition = mPosition;
try
{
readNumber();
}
catch (ScanException ex)
{
return false;
}
finally
{
mPosition = prevPosition;
}
return true; */
}
public string readIdentifier
(bool throwExceptionOnError
= true)
{
// TODO: read alphanumericwithunderscore (starting with alpha or underscore) until delimiter or whitespace
skipWhitespaces
();
if (isEndOfString
())
{
if (throwExceptionOnError
)
throw new ScanException
(UNEXPECTED_END_OF_STRING, getSurroundings
());
else
return null;
}
bool idFound
= false;
int startChar
= mPosition
;
int numChars
= 0;
while (!isEndOfString
() && isAlphaNumericOrUnderscore
(getCurrentChar
()))
{
idFound
= true;
numChars
++;
skipChar
();
}
if (!idFound
)
{
if (throwExceptionOnError
)
throw new ScanException
(UNEXPECTED_TOKEN
+ " Expected: Identifier", getSurroundings
());
else
return null;
}
return mString
.Substring(startChar, numChars
);
}
public bool checkIdentifier
()
{
int prevPosition
= mPosition
;
string id
= readIdentifier
(false);
mPosition
= prevPosition
;
if (id
== null)
return false;
return true;
// prevent exceptions on successful path execution on .NET
/*int prevPosition = mPosition;
try
{
readIdentifier();
}
catch (ScanException ex)
{
return false;
}
finally
{
mPosition = prevPosition;
}*/
}
public string readString
(bool throwExceptionOnError
= true)
{
skipWhitespaces
();
if (isEndOfString
())
{
if (throwExceptionOnError
)
throw new ScanException
(UNEXPECTED_END_OF_STRING, getSurroundings
());
else
return null;
}
char usedStringDelimiter
= mStringDelimiters
[0];
bool foundStringDelimiter
= false;
for (int i
= 0; i
< mStringDelimiters
.Length; i
++)
{
char stringDelimiter
= mStringDelimiters
[i
];
if (getCurrentChar
() == stringDelimiter
)
{
foundStringDelimiter
= true;
usedStringDelimiter
= stringDelimiter
;
}
}
if (!foundStringDelimiter
)
{
if (throwExceptionOnError
)
throw new ScanException
(UNEXPECTED_TOKEN
+ " Expected: String", getSurroundings
());
else
return null;
}
// skip first string delimiter
skipChar
();
int startChar
= mPosition
;
int numChars
= 0;
while (getCurrentChar
() != usedStringDelimiter
)
{
if (isEndOfString
())
{
if (throwExceptionOnError
)
throw new ScanException
(UNEXPECTED_END_OF_STRING, getSurroundings
());
else
return null;
}
numChars
++;
skipChar
();
}
// skip last string delimiter
skipChar
();
return mString
.Substring(startChar, numChars
);
}
public bool checkString
()
{
int prevPosition
= mPosition
;
string str
= readString
();
mPosition
= prevPosition
;
if (str
== null)
return false;
return true;
}
private bool isWhitespace
(char c
)
{
for (int i
= 0; i
< mWhitespaces
.Length; i
++)
{
char whitespace
= mWhitespaces
[i
];
if (c
== whitespace
)
return true;
}
return false;
}
private void skipWhitespaces
()
{
if (isEndOfString
())
return;
while (isWhitespace
(getCurrentChar
()))
{
skipChar
();
if (isEndOfString
())
return;
}
}
private bool isDelimiter
(char c
)
{
for (int i
= 0; i
< mDelimiters
.Length; i
++)
{
char delimiter
= mDelimiters
[i
];
if (c
== delimiter
)
return true;
}
return false;
}
private void skipChar
()
{
if (isEndOfString
())
return;
mPosition
++;
}
public bool isNextString
(string str
)
{
return Preprocessor
.isNextString(mString, mPosition, str
);
/*for (int i = 0; i < str.length(); i++)
{
if (mString.length() <= (mPosition+i))
return false;
if (str.charAt(i) != mString.charAt(mPosition + i))
return false;
}
return true;*/
}
public bool isEndOfString
()
{
return (mPosition
>= mString
.Length);
}
public bool checkNoMoreTokens
()
{
skipWhitespaces
();
return isEndOfString
();
}
public static bool isNumeric
(char c
)
{
return ((c
>= '0') && (c
<= '9'));
}
public static bool isUpperCaseAlpha
(char c
)
{
return ((c
>= 'A') && (c
<= 'Z'));
}
public static bool isLowerCaseAlpha
(char c
)
{
return ((c
>= 'a') && (c
<= 'z'));
}
public static bool isAlpha
(char c
)
{
return (isUpperCaseAlpha
(c
) || isLowerCaseAlpha
(c
));
}
public static bool isAlphaNumeric
(char c
)
{
return (isAlpha
(c
) || isNumeric
(c
));
}
public static bool isAlphaNumericOrUnderscore
(char c
)
{
return ((c
== '_') || isAlphaNumeric
(c
));
}
public char getCurrentChar
()
{
return mString
[mPosition
];
}
public void setWhitespaces
(char[] whitespaces
)
{
mWhitespaces
= whitespaces
;
}
public void setDelimiters
(char[] delimiters
)
{
mDelimiters
= delimiters
;
}
public void setStringDelimiter
(char[] stringDelimiters
)
{
mStringDelimiters
= stringDelimiters
;
}
public void setPosition
(int position
)
{
mPosition
= position
;
}
public int getPosition
()
{
return mPosition
;
}
/** For debugging purposes. */
public string getSurroundings
()
{
// retrieve the line number
int numLines
= 1;
for (int i
= 0; i
< mPosition
; i
++)
{
if (mString
[i
] == '\n')
numLines
++;
}
int startIndex
= Math
.Max(mPosition
- 10,
0);
int endIndex
= Math
.Min(mPosition
+ 10, mString
.Length - 1);
return "Line " + numLines
+ " <<<" + mString
.Substring(startIndex, endIndex
- startIndex
+ 1) + ">>>";
}
/** Skip until a token or the end of the string is encountered. Does not consume token, so the next token is the one provided. */
public void skipUntilToken
(String token
)
{
skipWhitespaces
();
if (isEndOfString
())
return;
while (!isNextString
(token
) && !isEndOfString
())
{
skipChar
();
}
}
/** Skip until a token or the end of the string is encountered, but consumes the token */
public void skipUntilAfterToken
(String token
)
{
skipWhitespaces
();
if (isEndOfString
())
return;
while (!isNextString
(token
) && !isEndOfString
())
{
skipChar
();
}
readToken
(token
);
}
}
}