The Annoyance Filter.pdf - Fourmilab
The Annoyance Filter.pdf - Fourmilab
The Annoyance Filter.pdf - Fourmilab
Create successful ePaper yourself
Turn your PDF publications into a flip-book with our unique Google optimized e-Paper software.
142 TOKEN DEFINITION ANNOYANCE-FILTER §170<br />
170. Token definition.<br />
A tokenDefinition object provides the means by which the tokenParser (below) distinguishes tokens<br />
in a stream of text. Tokens are defined by three arrays, each indexed by ISO character codes between 0<br />
and 255. <strong>The</strong> first, isToken , is true for characters which comprise tokens. <strong>The</strong> second, notExclusively ,<br />
is true for characters which may appear in tokens, but only in the company of other characters. <strong>The</strong><br />
third, notAtEnd is true for characters which may appear within a token, but not at the start or the end<br />
of one.<br />
〈 Class definitions 10 〉 +≡<br />
class tokenDefinition {<br />
protected:<br />
static const int numTokenChars = 256;<br />
bool isToken [numTokenChars ], notExclusively [numTokenChars ], notAtEnd [numTokenChars ];<br />
unsigned int minTokenLength , maxTokenLength ;<br />
public:<br />
tokenDefinition( )<br />
{<br />
clear ( );<br />
}<br />
void clear (void)<br />
{<br />
for (int i = 0; i < numTokenChars ; i++) {<br />
isToken [i] = notExclusively [i] = notAtEnd [i] = false ;<br />
}<br />
setLengthLimits (1, 65535);<br />
}<br />
void setLengthLimits (unsigned int lmin = 0, unsigned int lmax = 0)<br />
{<br />
if (lmin > 0) {<br />
minTokenLength = lmin ;<br />
}<br />
if (lmax > 0) {<br />
maxTokenLength = lmax ;<br />
}<br />
}<br />
unsigned int getLengthMin (void) const<br />
{<br />
return minTokenLength ;<br />
}<br />
unsigned int getLengthMax (void) const<br />
{<br />
return maxTokenLength ;<br />
}<br />
bool isTokenMember (const int c) const<br />
{<br />
assert(c ≥ 0 ∧ c < numTokenChars );<br />
return isToken [c];<br />
}<br />
bool isTokenNotExclusively (const int c) const<br />
{<br />
assert(c ≥ 0 ∧ c < numTokenChars );