The Annoyance Filter.pdf - Fourmilab
The Annoyance Filter.pdf - Fourmilab
The Annoyance Filter.pdf - Fourmilab
Create successful ePaper yourself
Turn your PDF publications into a flip-book with our unique Google optimized e-Paper software.
§35 ANNOYANCE-FILTER FAST DICTIONARY 39<br />
35. <strong>The</strong> exportDictionary method writes a dictionary to a file in fastDictionary format. We provide<br />
implementations which accept either an ostream of the name of a file to which the fastDictionary<br />
is written. If you pass an ostream, make sure it’s opened in binary mode on platforms where that<br />
matters.<br />
〈 Class implementations 11 〉 +≡<br />
void fastDictionary ::exportDictionary (const dictionary &d, ostream &o)<br />
{<br />
u int32 t hashSize = nextGreaterPrime (d.size ( ));<br />
vector〈u int32 t 〉 hashTable (hashSize , fastDictionaryVoidLink );<br />
vector〈unsigned char〉 words ;<br />
for (dictionary ::const iterator w = d.begin ( ); w ≠ d.end ( ); w++) {<br />
u int32 t h = computeHashValue (w ⃗ first );<br />
unsigned int slot = h % hashSize ;<br />
〈 Link new word to hash table chain 36 〉;<br />
〈 Add new word to word table 37 〉;<br />
}<br />
o ≪ fastDictionarySignature ;<br />
u int16 t b;<br />
b = byteOrderMark ;<br />
o.write (reinterpret cast〈const char ∗〉(&b), sizeof b); /∗ Byte order mark ∗/<br />
b = versionNumber ;<br />
o.write (reinterpret cast〈const char ∗〉(&b), sizeof b); /∗ File version number ∗/<br />
b = doubleSize ;<br />
o.write (reinterpret cast〈const char ∗〉(&b), sizeof b); /∗ Size of double in bytes ∗/<br />
b = 0;<br />
o.write (reinterpret cast〈const char ∗〉(&b), sizeof b); /∗ 88 Filler size is 2 bytes ∗/<br />
double td = fastDictionaryFloatingTest ;<br />
o.write (reinterpret cast〈const char ∗〉(&td ), sizeof td );<br />
1<br />
/∗ double compatibility test:<br />
111 ∗/<br />
u int32 t headerSize = 4 + (4 ∗ sizeof (u int16 t )) + sizeof (double) + (4 ∗ sizeof (u int32 t ));<br />
u int32 t wordTableSize = words .size ( );<br />
u int32 t totalSize = headerSize + (hashTable .size ( ) ∗ sizeof (u int32 t )) + wordTableSize ;<br />
o.write (reinterpret cast〈const char ∗〉(&totalSize ), sizeof totalSize );<br />
/∗ Total size of file ∗/<br />
o.write (reinterpret cast〈const char ∗〉(&headerSize ), sizeof headerSize );<br />
/∗ Hash table offset ∗/<br />
o.write (reinterpret cast〈const char ∗〉(&hashSize ), sizeof hashSize );<br />
/∗ Number of buckets in hash table ∗/<br />
o.write (reinterpret cast〈const char ∗〉(&wordTableSize ), sizeof wordTableSize );<br />
/∗ Word table size in bytes ∗/<br />
#ifdef OLDWAY<br />
o.write (hashTable .begin ( ), hashTable .size ( ) ∗ sizeof (u int32 t )); /∗ Hash table ∗/<br />
o.write (words .begin ( ), words .size ( )); /∗ Word table ∗/<br />
#else<br />
for (vector〈u int32 t 〉::const iterator htp = hashTable .begin ( ); htp ≠ hashTable .end ( ); htp ++) {<br />
u int32 t hte = ∗htp;<br />
o.write (reinterpret cast〈const char ∗〉(&hte ), sizeof hte );<br />
}<br />
for (vector〈unsigned char〉::const iterator wtp = words .begin ( ); wtp ≠ words .end ( ); wtp ++) {<br />
o.put (∗wtp);