12.06.2015 Views

The Annoyance Filter.pdf - Fourmilab

The Annoyance Filter.pdf - Fourmilab

The Annoyance Filter.pdf - Fourmilab

SHOW MORE
SHOW LESS

Create successful ePaper yourself

Turn your PDF publications into a flip-book with our unique Google optimized e-Paper software.

74 UTF-16 UNICODE DECODER ANNOYANCE-FILTER §86<br />

86. Decode the next logical character. We return −1 when the end of the encoded line is encountered.<br />

〈 Class implementations 11 〉 +≡<br />

int UTF 16 Unicode MBCSdecoder ::getNextDecodedChar (void)<br />

{<br />

string ::size type nwydes = 0;<br />

int w1 , w2 , result ;<br />

w1 = getNextUTF 16Word ( );<br />

if (w1 < 0) {<br />

return w1 ;<br />

}<br />

if ((w1 ≤ # D800) ∨ (w1 > # DFFF)) {<br />

result = w1 ;<br />

nwydes = 1;<br />

}<br />

else if ((w1 ≥ # D800) ∧ (w1 ≤ # DBFF)) {<br />

w2 = getNextUTF 16Word ( );<br />

if (w2 < 0) {<br />

ostringstream os ;<br />

os ≪ name ( ) ≪ "_MBCSdecoder:␣Premature␣end␣of␣line␣in␣UTF−16␣two␣word␣char\<br />

acter.";<br />

reportDecoderDiagnostic(os );<br />

return −1;<br />

}<br />

nwydes = 2;<br />

if ((w2 < # DC00) ∨ (w2 > # DFFF)) {<br />

ostringstream os ;<br />

os ≪ name ( ) ≪ "_MBCSdecoder:␣Invalid␣second␣word␣surrogate␣" ≪ "0x" ≪<br />

setiosflags (ios ::uppercase ) ≪ hex ≪ w2 ≪ "␣in␣UTF−16␣encoded␣string.";<br />

reportDecoderDiagnostic(os );<br />

return −1;<br />

}<br />

result = (((w1 & # 3FF) ≪ 10) | (w2 & # 3FF)) + # 10000;<br />

}<br />

else {<br />

ostringstream os ;<br />

os ≪ name ( ) ≪ "_MBCSdecoder:␣Invalid␣first␣word␣surrogate␣" ≪ "0x" ≪<br />

setiosflags (ios ::uppercase ) ≪ hex ≪ w1 ≪ "␣in␣UTF−16␣encoded␣string.";<br />

reportDecoderDiagnostic(os );<br />

return −1;<br />

}<br />

return result ;<br />

}

Hooray! Your file is uploaded and ready to be published.

Saved successfully!

Ooh no, something went wrong!