The Annoyance Filter.pdf - Fourmilab
The Annoyance Filter.pdf - Fourmilab
The Annoyance Filter.pdf - Fourmilab
You also want an ePaper? Increase the reach of your titles
YUMPU automatically turns print PDFs into web optimized ePapers that Google loves.
98 FLASH TEXT EXTRACTOR ANNOYANCE-FILTER §120<br />
120. Most of the text we’re really interested in will be found in the DefineText tag and its younger<br />
sibling DefineText2. After spitting out the various wobbly green parts, we digest the list of glyphs<br />
composing the text, going back to the font definition to claw them back into civilised language which<br />
we can filter.<br />
〈 Parse Flash DefineText tags 120 〉 ≡<br />
{<br />
#ifdef FLASH_PARSE_DEBUG<br />
unsigned short textID = get16 ( );<br />
cout ≪ "DefineText.␣␣ID␣=␣" ≪ textID ≪ endl ;<br />
#else<br />
get16 ( ); /∗ Ignore textID ∗/<br />
#endif<br />
rect tr ;<br />
getRect (&tr );<br />
matrix tm ;<br />
getMatrix (&tm );<br />
unsigned short textGlyphBits = get8 ( );<br />
unsigned short textAdvanceBits = get8 ( );<br />
int fontId = −1;<br />
map〈unsigned short, vector〈unsigned short〉 ∗〉::iterator fontp = fontMap.end ( );<br />
map〈unsigned short, unsigned short〉::iterator fgcp = fontGlyphCount .end ( );<br />
unsigned int fGlyphs = 0;<br />
fontFlags fFlags = static cast〈fontFlags〉(0);<br />
vector〈unsigned short〉 ∗fontChars = Λ; /∗ Now it’s a matter of parsing the text records ∗/<br />
while (true ) {<br />
unsigned int textRecordType = get8 ( );<br />
if (textRecordType ≡ 0) {<br />
break; /∗ 0 indicates end of text records ∗/<br />
}<br />
if (textRecordType & isTextControl ) {<br />
#ifdef FLASH_PARSE_DEBUG<br />
cout ≪ "Text␣control␣record." ≪ endl ;<br />
#endif<br />
if (textRecordType & textHasFont ) {<br />
fontId = get16 ( );<br />
#ifdef FLASH_PARSE_DEBUG<br />
cout ≪ "␣␣␣␣fontId:␣" ≪ fontId ≪ endl ;<br />
#endif<br />
fgcp = fontGlyphCount .find (fontId );<br />
if (fgcp ≡ fontGlyphCount .end ( )) {<br />
fontp = fontMap.end ( );<br />
if (verbose ) {<br />
cerr ≪ "Flash␣DefineText␣item␣references␣undefined␣font␣ID␣" ≪ fontId ≪<br />
endl ;<br />
}<br />
}<br />
else {<br />
fGlyphs = fgcp ⃗ second ;<br />
fontChars = fontMap.find (fontId ) ⃗ second ;