Project Dana

Type definition file parsing.Tokeniser by barry
uses data.String

/*
{"description" : "Data type representing a parsed entity."}
*/
data ParseToken {
	/* {"@description" : "Possible value for 'type', indicating that this is a something other than a syntax token, literal, or comment."} */
	const int TYPE_PARTICLE			= 0
	/* {"@description" : "Possible value for 'type', indicating that this is string literal."} */
	const int TYPE_LITERAL_STRING 	= 1
	/* {"@description" : "Possible value for 'type', indicating that this is a literal number."} */
	const int TYPE_LITERAL_NUMBER 	= 2
	/* {"@description" : "Possible value for 'type', indicating that this is a line comment."} */
	const int TYPE_LINE_COMMENT 	= 3
	/* {"@description" : "Possible value for 'type', indicating that this is a block comment."} */
	const int TYPE_BLOCK_COMMENT 	= 4
	/* {"@description" : "Possible value for 'type', indicating that this is a syntax token."} */
	const int TYPE_TOKEN 			= 5
	
	/* {"@description" : "The type of this parsed entity, such as token or string literal."} */
	int type
	/* {"@description" : "The contents of this parsed entity (i.e., the syntax token, or the comment)."} */
	char content[]
	
	/* {"@description" : "The position at which this parsed entity starts, as an index into the original unparsed text."} */
	int sourceStart
	/* {"@description" : "The length of the original unparsed text from which this parsed entity was derived."} */
	int sourceLength
	}

/* {"description" : "This type describes a parsing issue."} */
data TokeniseIssue {
	/* {"@description" : "Parsing failed due to an unrecognisable number format."} */
	const int I_UNKNOWN_NUMBER_FORMAT = 1
	/* {"@description" : "Parsing failed due to an invalid Unicode character."} */
	const int I_INVALID_UNICODE = 2
	/* {"@description" : "Parsing failed due to an invalid escape character in a string."} */
	const int I_INVALID_ESCAPE_CHAR = 3
	/* {"@description" : "Parsing failed due to an unterminated string literal."} */
	const int I_UNTERMINATED_STRING = 4
	/* {"@description" : "The type of the issue."} */
	int type
	/* {"@description" : "A human-readonable text description of the issue."} */
	char description[]
}

/* {"description" : "This is a container type to hold the parsed token list, and/or details of any parsing issues."} */
data TokeniseResult {
	/* {"@description" : "The list of parsed tokens; this may be null if terminal parsing issues were encountered."} */
	ParseToken tokens[]
	/* {"@description" : "The issues (if any) encountered during a parse."} */
	TokeniseIssue issues[]
}

/* {"description" : "This interface is used to tokenise a text document, using a given set of tokens, and supporting both line and block comments."} */
interface Tokeniser {
	/*
	{"@description" : "Initialise a tokeniser instance with the given set of tokens.",
		"tokens" : "The tokens that will be used by the tokeniser to separate text."}
	*/
	Tokeniser(store String tokens[])
	
	/*
	{"@description" : "Set the string that indicates the remainder of a line is a comment.",
		"lineComment" : "A string that indicates the remainder of a line is a comment."}
	*/
	void setLineComment(char lineComment[])
	
	/*
	{"@description" : "Set the starting and ending string that indicate the enclosed text is a comment.",
		"start" : "A string that indicates the start of a block comment.",
		"end" : "A string that indicates the end of a block comment."}
	*/
	void setBlockComment(char start[], char end[])
	
	/*
	{"@description" : "Run the tokeniser, returning an array of all tokens.",
		"content" : "The text to split into tokens.",
		"@return" : "An array of tokens."}
	*/
	TokeniseResult tokenise(char content[])
	}
Revision history
To propose a new revision to this entity, use dana source put -ut your/new/version.dn -n parsing.Tokeniser -m "reason for update" -u yourUsername
Version 1 (this version) by barry
Notes for this version: Standard Library Initialisation