const
AllByteValues=
#$00#$01#$02#$03#$04#$05#$06#$07#$08#$09#$0a#$0b#$0c#$0d#$0e#$0f+
#$10#$11#$12#$13#$14#$15#$16#$17#$18#$19#$1a#$1b#$1c#$1d#$1e#$1f+
#$20#$21#$22#$23#$24#$25#$26#$27#$28#$29#$2a#$2b#$2c#$2d#$2e#$2f+
#$30#$31#$32#$33#$34#$35#$36#$37#$38#$39#$3a#$3b#$3c#$3d#$3e#$3f+
#$40#$41#$42#$43#$44#$45#$46#$47#$48#$49#$4a#$4b#$4c#$4d#$4e#$4f+
#$50#$51#$52#$53#$54#$55#$56#$57#$58#$59#$5a#$5b#$5c#$5d#$5e#$5f+
#$60#$61#$62#$63#$64#$65#$66#$67#$68#$69#$6a#$6b#$6c#$6d#$6e#$6f+
#$70#$71#$72#$73#$74#$75#$76#$77#$78#$79#$7a#$7b#$7c#$7d#$7e#$7f+
#$80#$81#$82#$83#$84#$85#$86#$87#$88#$89#$8a#$8b#$8c#$8d#$8e#$8f+
#$90#$91#$92#$93#$94#$95#$96#$97#$98#$99#$9a#$9b#$9c#$9d#$9e#$9f+
#$a0#$a1#$a2#$a3#$a4#$a5#$a6#$a7#$a8#$a9#$aa#$ab#$ac#$ad#$ae#$af+
#$b0#$b1#$b2#$b3#$b4#$b5#$b6#$b7#$b8#$b9#$ba#$bb#$bc#$bd#$be#$bf+
#$c0#$c1#$c2#$c3#$c4#$c5#$c6#$c7#$c8#$c9#$ca#$cb#$cc#$cd#$ce#$cf+
#$d0#$d1#$d2#$d3#$d4#$d5#$d6#$d7#$d8#$d9#$da#$db#$dc#$dd#$de#$df+
#$e0#$e1#$e2#$e3#$e4#$e5#$e6#$e7#$e8#$e9#$ea#$eb#$ec#$ed#$ee#$ef+
#$f0#$f1#$f2#$f3#$f4#$f5#$f6#$f7#$f8#$f9#$fa#$fb#$fc#$fd#$fe#$ff;
RawByteTest=
RawByteString(AllByteValues);
GreekTest=
GreekString(AllByteValues);
AnsiTest=
ansistring(AllByteValues);
procedure TForm3.Button2Click(Sender: TObject);
var
i:0..255;
ErrorList:string;
c:char;
ac:ansichar;
utf16:string;
begin
Assert (length(AllByteValues)=256,'The number of characters is just like in Delphi 2006');
Assert (sizeof(AllByteValues)=4,'This is a pointer');
Assert (sizeof(AllByteValues[1])=2,'But each character is now 2 bytes');
Assert (AllByteValues[1]=#0);
Assert (length(RawByteTest)=256);
Assert (sizeof(RawByteTest)=4,'This is a pointer');
Assert (sizeof(RawByteTest[1])=1,'Using RawByteString in a const the bytes stay as they are');
Assert (RawByteTest[1]=#0);
Assert (RawByteTest[1]=char(0));
Assert (RawByteTest[1]=chr(0));
ac:=#0;
Assert (RawByteTest[1]=ac);
c:=#0;
// Assert (RawByteTest[1]=c); // This line does not compile! - AnsiChar and Char are absolutely not compatible in any way.
Assert (ord(RawByteTest[1])=ord(c)); // This compiles nicely
// Demonstrate how #128..#159 does not exist in Unicode and therefore causes big trouble!
ErrorList:='';
for i:=0 to 255 do begin
if ord(AllByteValues[i+1])<>i then
ErrorList:=ErrorList+IntToStr(i)+' ';
end;
Assert (ErrorList='128 130 131 132 133 134 135 136 137 138 139 '+
'140 142 145 146 147 148 149 150 151 152 153 154 155 156 158 159 ',
'These values are not saved in a string in the way you would expect!!');
// GreekString also destroys constants with binary data
ErrorList:='';
for i:=0 to 255 do begin
if ord(GreekTest[i+1])<>i then
ErrorList:=ErrorList+IntToStr(i)+' ';
end;
Assert (ErrorList='136 138 140 142 152 154 156 158 159 161 162 170 175 '+
'180 184 185 186 188 190 191 192 193 194 195 196 197 198 199 200 201 '+
'202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 '+
'219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 '+
'236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 '+
'253 254 255 ',
'These values are not saved in a string in the way you would expect!!');
// RawByteString stores all bytes correctly
for i:=0 to 255 do begin
Assert (ord(RawByteTest[i+1])=i);
end;
// Ansistring also stores all bytes correctly (tested on a Windows-1252 machine)
for i:=0 to 255 do begin
Assert (ord(AnsiTest[i+1])=i);
end;
// Most common ansistring stuff works as expected
Assert (ord(AnsiTest[129])=128);
Assert (AnsiTest[129]=#128);
Assert (copy(AnsiTest,129,1)=#128);
Assert (MidStr(AnsiTest,129,1)=#128);
Assert (pos(#128,AnsiTest)=129);
// The same functions using UnicodeString
utf16:=AllByteValues;
Assert (ord(utf16[129])=8364);
Assert (utf16[129]=#8364);
Assert (copy(utf16,129,1)=#8364);
Assert (MidStr(utf16,129,1)=#8364);
Assert (pos(#128,utf16)=129); // #128 is converted to #8364 before calling the widestring version of pos()
Assert (pos(#8364,utf16)=129);
// Don't copy raw binary data into an utf-16 string type!
utf16:=RawByteTest;
ErrorList:='';
for i:=0 to 255 do begin
if ord(utf16[i+1])<>i then
ErrorList:=ErrorList+IntToStr(i)+' ';
end;
Assert (ErrorList='128 130 131 132 133 134 135 136 137 138 139 140 142 145 '+
'146 147 148 149 150 151 152 153 154 155 156 158 159 ',
'These values are not saved in a string in the way you would expect!!');
// Windows automatically handles unsupported byte values in strange ways.
c:=#128;
Assert (ord(c)<>128);
Assert (ord(c)=8364);
Assert (c='€');
ac:=#128;
Assert (ord(ac)=128);
Assert (ord(ac)<>8364);
Assert (ac='€','Here, ac is converted to a utf-16 string type using local character set');
// Don't use inc() or dec() with utf-16. It works, but it's not good
utf16:=#127;
Assert (ord(utf16[1])=127);
inc (utf16[1]);
Assert (ord(utf16[1])=128);
Assert (utf16[1]<>#128); // Because #128 becomes #8364
Assert (#128=#8364); // as you can see here
end;
Conclusion: Always use RawByteString or AnsiString for binary data, and never store binary data in other string types.
You mean aside from the obvious problem that you can only store an even numbered amount of data in the new double byte based string *OR* have each byte word aligned.
ReplyDeleteBoth of which defeat the purpose?
Nonsense.
ReplyDeleteAnsiString and RawByteString both use one byte per character. Look it up.
I think the reference was to the new string type, not the old ansistring type.
ReplyDeleteOn my pc(Simplified Chinese),
ReplyDeletelength(AllByteValues) = 193, not 256.
>AnsiString and RawByteString both
ReplyDelete>use one byte per character.
Yes it's true if you are using one byte per character code page (latin or cyrillic).
For hieroglyphs it's not true.