3d browser

For Lachlan

Image
Screenshot of CodeCentral item 28673

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE constants[
  <!ENTITY notice "The XML-csv format was developed by Sean B. Durkin�www.seanbdurkin.id.au">
  ]>  
<xsl:stylesheet 
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform" 
  xmlns:fn="http://www.w3.org/2005/xpath-functions" 
  xmlns:local="http://seanbdurkin.id.au/xslt/csv2xml.xsl" 
  xmlns:xs="http://www.w3.org/2001/XMLSchema" 
  xmlns:csv="http://seanbdurkin.id.au/xslt/csv.xsd"
  version="2.0" exclude-result-prefixes="xsl xs fn local">
<xsl:output indent="yes" encoding="UTF-8" />
<xsl:import-schema schema-location="http://seanbdurkin.id.au/xslt/csv.xsd"
                   use-when="system-property('xsl:is-schema-aware')='yes'"/>
<xsl:param name="url-of-csv" />

<!-- Constants -->
<xsl:variable name="quote" as="xs:string">"</xsl:variable>
<xsl:variable name="fail-open-message-xlations"> <!-- Localise here, as required. -->
  <en>Could not open CSV resource </en>
  <!-- Insert other language translations here, as needed. -->
</xsl:variable> 
<xsl:variable name="lang" as="xs:string">en</xsl:variable> <!-- Language for $fail-open-message. -->
<xsl:variable name="fail-open-message">
 <xsl:value-of select="$fail-open-message-xlations/en" />
</xsl:variable> 

<xsl:function name="local:unparsed-text-lines" as="xs:string+">
 <xsl:param name="href" as="xs:string" />
 <xsl:sequence use-when="function-available('unparsed-text-lines')" select="fn:unparsed-text-lines($href)" />
 <xsl:sequence use-when="not(function-available('unparsed-text-lines'))" select="tokenize(unparsed-text($href), '\r\n|\r|\n')[not(position()=last() and .='')]" />
</xsl:function>

<xsl:function name="local:error-node" as="node()">
 <xsl:param name="message" as="xs:string" />
 <xsl:param name="do-localise" as="xs:boolean" />
 <csv:error>
  <xsl:if test="$do-localise"> 
   <xsl:attribute name="xml:lang"><xsl:value-of select="$lang"/></xsl:attribute>
  </xsl:if> 
  <xsl:value-of select="$message" />
 </csv:error>
</xsl:function>
  
<xsl:function name="local:csv-to-xml" as="node()+">
 <xsl:param name="href" as="xs:string" />
 <csv:comma-separated-values>
 <csv:notice xml:lang="en">¬ice;</csv:notice> <!-- Notice language is fixed as per Schema. -->
       <xsl:copy-of select="local:error-node($fail-open-message, fn:true())"/>
 <xsl:choose>
   <xsl:when test="fn:unparsed-text-available($href)">
    <xsl:for-each select="local:unparsed-text-lines($href)">
   <csv:row>
     <xsl:analyze-string select="fn:concat(., ',')" regex='((("[^"]*")+[^,])*|[^,"]*),'>
      <xsl:matching-substring>
     <csv:cell>
        <xsl:if test="fn:starts-with( fn:regex-group(1), $quote)">
       <xsl:attribute name="quoted">
          <xsl:value-of select="fn:true()" />  
     </xsl:attribute>
        </xsl:if>
        <xsl:value-of select='fn:replace(fn:regex-group(1), "^""|""$|("")""", "$1" )' />
     </csv:cell>
      </xsl:matching-substring>
      <xsl:non-matching-substring>
       <xsl:copy-of select="local:error-node(., fn:false())"/>
      </xsl:non-matching-substring>
     </xsl:analyze-string>
   </csv:row>
    </xsl:for-each>
   </xsl:when>
   <xsl:otherwise>
    <xsl:copy-of select="local:error-node(fn:concat( $fail-open-message, $href), fn:true())"/>
   </xsl:otherwise>
  </xsl:choose>
 </csv:comma-separated-values>
</xsl:function>

<xsl:template match="/" name="main" mode="csv-to-xml">
 <xsl:copy-of select="local:csv-to-xml($url-of-csv)" />
</xsl:template>

<xsl:template match="csv" mode="xml-to-csv">
 <xsl:apply-templates select="row" />
</xsl:template>

<xsl:template match="row" mode="xml-to-csv">
 <xsl:apply-templates select="cell" />
 <xsl:text>
</xsl:text>
</xsl:template>

</xsl:stylesheet>


unit uCharacters;
interface
uses Generics.Collections, SysUtils, Classes;
type

UniChar = UCS4Char;
GB_CodePoint = LongWord;


IEncodingFamily = interface;
IEncoding = interface;
ICharCodec = interface;

ICharacterServices = interface
  ['{D4C7BFF8-CBE3-411A-AE29-6C281CBDE296}']
    function FindEncodingFamily( const FamilyName: string; out Family: IEncodingFamily): boolean;
    function FindEncoding( const EncodingName: string; out Encoding: IEncoding): boolean;
    function EncodingFamilies: TEnumerable<IEncodingFamily>;
    function Encodings: TEnumerable<IEncoding>;
    function DefaultEncoding: IEncoding;
    function NativeEncoding: IEncoding;
    function UTF8: IEncoding;
    function UTF16LE: IEncoding;
    function ReadBOM( Document: TStream): IEncodingFamily;
    procedure RegisterCustomEncoding( const Addend: IEncoding; const Family: IEncodingFamily);
  end;

IEncodingFamily = interface
  ['{767A1283-319B-430D-B1C4-CE320DD9B1DA}']
    function Name: string;
    function BaseEncoding: IEncoding;
    function Encodings: TEnumerable<IEncoding>;
    function BytesPerCodeUnit: integer;
    function BOM: TBytes;
  end;

//Need to support:
// ISO-8859-1
// UTF-8
// UTF-16LE
// UTF-16BE
// GB 18030-2000  Could not find 2005

IEncoding = interface
  ['{F445A9B6-B975-4BBC-BFCE-3C318FC6F84A}']
    function Name: string;
    function Family: IEncodingFamily;
    function Aliases: string;  // '|' separated list of aliase's to Name.
    function CharCodec: ICharCodec;
    function isNative: boolean;
    function CodePage: integer;
    function ToString( const Encoded: RawByteString): string;
    function FromString( const Native: string): RawByteString;
    function ReadCharactersFromStream(
      Document: TStream; RequestCount: integer;
      out ReadCount, ReadBytes: Integer): string;
    // Reads RequestCount whole unicode characters from the stream
    //  encodes the characters in the native encoding (UTF16-LE)
    //  and returns this read string.
    //  ReadCount the count of succesfully read unicode characters,
    //  and ReadBytes is the count of bytes read.
    // If the encoding is UTF-16, orphaned lead or trail surrogates
    //  are silently dropped.

    function ReadCharacterFromStream(
      Document: TStream; out ReadBytes: Integer): UniChar;
    // Similar to ReadCharactersFromStream, but only reads one
    //  character and that character is returned directly instead
    //  of being UTF-16LE encoded.
  end;

ICharCodec = interface
  ['{BC0DA141-0A0A-48EA-83E3-99C7435D9F43}']
    procedure SetSynced;
    function  isInSync: boolean;
    function  ReadUniChar( b: byte; var ch: UniChar): boolean;
    function  WriteUniChar( ch: UniChar; var b: byte): boolean;
  end;

function UniChar_to_Char( Ch: UniChar; var Lead, Trail: Char): boolean;
// Converts unicode code-point Ch into UTF-16 code units.
// Returns True iff 2 code units are returned instead of 1.
// Invalid code-points are not checked for.

function Char_to_UniChar1( Lead: Char; var ch: UniChar): boolean;
// Converts a lead/solo UTF-16 code unit into a nicode code-point Ch.
// Returns True iff a trail code unit is required. If this is the case,
//  invoke Char_to_UniChar2.

procedure Char_to_UniChar2( Trail: Char; var ch: UniChar);
// Converts a trail UTF-16 code unit into a nicode code-point Ch,
//  following a call to Char_to_UniChar1.

function CharacterServices: ICharacterServices;
// Get the CharacterServices. This is a singleton object.

function NewCharacterServices: ICharacterServices;
// Makes a fresh instance of CharacterServices, perhaps for the purpose
//  of temporary registration of a custom IEncoding.







function GBBytes_to_GB_CodePoint( GBBytes: LongRec; CodePointRangeCase: Integer): GB_CodePoint;
function GB_CodePoint_to_GBBytes( CodePoint: GB_CodePoint; out CodePointRangeCase: Integer): LongRec;
function ByteLengthOfRangeCase( CodePointRangeCase: integer): integer;
function EstimateRangeCase( ByteValue: byte; ByteIndex: Integer{0 or 1 only}): integer;
function IsValidByte( ByteValue: byte; ByteIndex: Integer; CodePointRangeCase: integer): boolean;

implementation
uses Generics.Defaults;


type
TEnumerable_Family = class( TEnumerable<IEncodingFamily>)
  protected
    FMembers: IInterfaceList;    // of IEncodingFamily
  end;

TEnumerable_Encoding = class( TEnumerable<IEncoding>)
  protected
    FMembers: IInterfaceList;    // of IEncoding
  end;

TCharacterServices = class( TInterfacedObject, ICharacterServices)
  protected
    FFamilies: IInterfaceList; // of IEncodingFamily
    FFamilyEnc: TEnumerable_Family;
    FEncodiEnc: TEnumerable_Encoding;
    FDefaultEncoding: IEncoding;
    FNativeEncoding: IEncoding;
    FUTF8: IEncoding;
    FUTF16LE: IEncoding;

  public
    function FindEncodingFamily( const FamilyName: string; out Family: IEncodingFamily): boolean;
    function FindEncoding( const EncodingName: string; out Encoding: IEncoding): boolean;
    function EncodingFamilies: TEnumerable<IEncodingFamily>;
    function Encodings: TEnumerable<IEncoding>;
    function DefaultEncoding: IEncoding;
    function NativeEncoding: IEncoding;
    function UTF8: IEncoding;
    function UTF16LE: IEncoding;
    function ReadBOM( Document: TStream): IEncodingFamily;
    procedure RegisterCustomEncoding( const Addend: IEncoding; const Family: IEncodingFamily);

    constructor Create;
    destructor  Destroy; override;
  end;

TEncodingFamily = class abstract( TInterfacedObject, IEncodingFamily)
  protected
    FBaseEncoding: IEncoding;
    FEncodings: TEnumerable_Encoding;
    FMembers: IInterfaceList;    // of IEncoding
    FBytesPerCodeUnit: integer;
    FBOM: TBytes;

    function Name: string;                            virtual; abstract;
    function BaseEncoding: IEncoding;                 virtual;
    function Encodings: TEnumerable<IEncoding>;
    function BytesPerCodeUnit: integer;
    function BOM: TBytes;

  public
    constructor Create;
    destructor  Destroy; override;
  end;

TEncoding = class abstract( TInterfacedObject, IEncoding)
  protected
    FFamily: pointer;{ cast as IEncodingFamily}
    FAliases: string;
    FCodePage: integer;

    function Name: string;                                       virtual; abstract;
    function Family: IEncodingFamily;
    function Aliases: string;
    function CharCodec: ICharCodec;                              virtual; abstract;
    function isNative: boolean;                                  virtual;
    function CodePage: integer;
    function ToString( const Encoded: RawByteString): string;    virtual;
    function FromString( const Native: string): RawByteString;   virtual;
    function ReadCharactersFromStream(
      Document: TStream; RequestCount: integer;
      out ReadCount, ReadBytes: Integer): string;
    function ReadCharacterFromStream(
      Document: TStream; out ReadBytes: Integer): UniChar;

  public
    constructor Create;
    destructor  Destroy; override;
  end;

TCharCodec = class abstract( TInterfacedObject, ICharCodec)
  protected
    procedure SetSynced;                                         virtual;
    function  isInSync: boolean;                                 virtual;
    function  ReadUniChar( b: byte; var ch: UniChar): boolean;   virtual; abstract;
    function  WriteUniChar( ch: UniChar; var b: byte): boolean;  virtual; abstract;
  end;


TUTF16LECodec = class( TCharCodec)
  protected
    FByteIdx: integer;
    FBuff: LongRec;
    FNeedBytes: integer;
    procedure SetSynced;                                         override;
    function  isInSync: boolean;                                 override;
    function  ReadUniChar( b: byte; var ch: UniChar): boolean;   override;
    function  WriteUniChar( ch: UniChar; var b: byte): boolean;  override;
  end;


TUTF8Codec = class( TCharCodec)
  protected
    FByteIdx: integer;
    FBuff: LongRec;
    FNeedBytes: integer;
    procedure SetSynced;                                         override;
    function  isInSync: boolean;                                 override;
    function  ReadUniChar( b: byte; var ch: UniChar): boolean;   override;
    function  WriteUniChar( ch: UniChar; var b: byte): boolean;  override;
  end;




TUTF8Family = class ( TEncodingFamily)
  protected
    function Name: string;                          override;
    function BaseEncoding: IEncoding;               override;
  end;

TUTF16Family = class abstract( TUTF8Family)
  protected
    function Name: string;                          override;
    function BaseEncoding: IEncoding;               override;
  end;

TUTF16LEFamily = class ( TUTF16Family)
  protected
    function BaseEncoding: IEncoding;               override;
  end;

TUTF16BEFamily = class ( TUTF16Family)
  protected
    function BaseEncoding: IEncoding;               override;
  end;

TUTF8Encoding = class ( TEncoding)
  protected
    function Name: string;                                       override;
    function CharCodec: ICharCodec;                              override;
    function isNative: boolean;                                  override;
    function ToString( const Encoded: RawByteString): string;    override;
    function FromString( const Native: string): RawByteString;   override;
  end;

TUTF16LEEncoding = class ( TEncoding)
  protected
    function Name: string;                                       override;
    function CharCodec: ICharCodec;                              override;
    function isNative: boolean;                                  override;
    function ToString( const Encoded: RawByteString): string;    override;
    function FromString( const Native: string): RawByteString;   override;
  end;

TUTF16BEEncoding = class ( TEncoding)
  protected
    function Name: string;                                       override;
    function CharCodec: ICharCodec;                              override;
    function isNative: boolean;                                  override;
    function ToString( const Encoded: RawByteString): string;    override;
    function FromString( const Native: string): RawByteString;   override;
  end;


ISO_8859_1 = class sealed( TEncoding)
  protected
    function Name: string;                                       override;
    function CharCodec: ICharCodec;                              override;
    function isNative: boolean;                                  override;
    function ToString( const Encoded: RawByteString): string;    override;
    function FromString( const Native: string): RawByteString;   override;
  end;

// Static data for stock families.
RStockFamilyStaticDatum = record
  Family       : TClass;
  Name         : string;
  BOM          : RawByteString;
  nCodeUnit    : integer;  // Bytes per code unit.
  nMaxCodeUnits: integer;  // Maximum code unit per unicode character.
end;

const
 StockFamilyData: array[0..0] of RStockFamilyStaticDatum = (
  (Family: nil; Name: ''; BOM: ''; nCodeUnit: 0; nMaxCodeUnits: 0)
  );

type
// Static data for stock Encodings.
RStockEncodingStaticDatum = record
  Family       : TClass;
  Encoding     : TClass;
  Name         : string;
  nMaxCodeUnits: integer;  // Maximum code unit per unicode character.
  Aliases      : string;
  CodePage     : integer;  // 0 = no code page applicable.
  CanAutoSync  : Boolean;
end;

const
 StockEncodingData: array[0..0] of RStockEncodingStaticDatum = (
  (Family: nil; Encoding: nil; Name: ''; nMaxCodeUnits: 0; Aliases: ''; CodePage: 0; CanAutoSync: False)
  );


  ReplacementCharacter: UniChar = $00FFFD;
  ReplacementChar     :    Char =  #$FFFD;


function IsTailSurrogate( ch: Char): boolean; inline;
begin
result := (ch >= #$D800) and (ch <= #$DBFF)
end;

function IsLeadSurrogate( ch: Char): boolean; inline;
begin
result := (ch >= #$DC00) and (ch <= #$DFFF)
end;

function UniChar_to_Char( Ch: UniChar; var Lead, Trail: Char): boolean;
begin
end;

function Char_to_UniChar1( Lead: Char; var ch: UniChar): boolean;
begin
end;

procedure Char_to_UniChar2( Trail: Char; var ch: UniChar);
begin
end;

function CharacterServices: ICharacterServices;
begin
end;

function NewCharacterServices: ICharacterServices;
begin
end;


//http://www.iana.org/assignments/character-setswindows-1251  http://tlt.its.psu.edu/suggestions/international/bylanguage/index.html
//http://www.sagehill.net/docbookxsl/CharEncoding.html
//
//u = the unicode code point. Can be stored in 32 bits.
//l = lead word
//t = trail word
// u: [$000000..$10FFFF]-[$00D800..$00DFFF];
//
//if u >= $010000:
// l := ((u - $010000) shr 10     ) + $D800
// l == [$D800..$DBFF]
// t := ((u - $010000) and $0003FF) + $DC00
// t == [$DC00..0xDFFF]
//
//if u < $010000:
// l := u
// l == [$0000..$D7FF]+[$E000..$FFFF];
// (no t)
//
//if (l <= $D7FF) or (l >= $E000):
//  (no t)
//  u := l
//
//if (l >= $D800) and (l <= $DBFF):
// u := ((l - $D800) shl 10) + (t - $DC00) + $010000
{ TCharacterServices }

constructor TCharacterServices.Create;
begin

end;

function TCharacterServices.DefaultEncoding: IEncoding;
begin

end;

destructor TCharacterServices.Destroy;
begin

  inherited;
end;

function TCharacterServices.EncodingFamilies: TEnumerable<IEncodingFamily>;
begin

end;

function TCharacterServices.Encodings: TEnumerable<IEncoding>;
begin

end;

function TCharacterServices.FindEncoding(const EncodingName: string;
  out Encoding: IEncoding): boolean;
begin

end;

function TCharacterServices.FindEncodingFamily(const FamilyName: string;
  out Family: IEncodingFamily): boolean;
begin

end;

function TCharacterServices.NativeEncoding: IEncoding;
begin

end;

function TCharacterServices.ReadBOM( Document: TStream): IEncodingFamily;
begin

end;

procedure TCharacterServices.RegisterCustomEncoding(
  const Addend: IEncoding; const Family: IEncodingFamily);
begin

end;

function TCharacterServices.UTF16LE: IEncoding;
begin

end;

function TCharacterServices.UTF8: IEncoding;
begin

end;

{ TEncodingFamily }

function TEncodingFamily.BaseEncoding: IEncoding;
begin

end;

function TEncodingFamily.BOM: TBytes;
begin

end;

function TEncodingFamily.BytesPerCodeUnit: integer;
begin

end;

constructor TEncodingFamily.Create;
begin

end;

destructor TEncodingFamily.Destroy;
begin

  inherited;
end;

function TEncodingFamily.Encodings: TEnumerable<IEncoding>;
begin

end;

{ TEncoding }

function TEncoding.Aliases: string;
begin

end;

function TEncoding.CodePage: integer;
begin

end;

constructor TEncoding.Create;
begin

end;

destructor TEncoding.Destroy;
begin

  inherited;
end;

function TEncoding.Family: IEncodingFamily;
begin

end;

function TEncoding.FromString( const Native: string): RawByteString;
begin

end;

function TEncoding.isNative: boolean;
begin
result := False
end;

function TEncoding.ReadCharacterFromStream(
  Document: TStream;
  out ReadBytes: Integer): UniChar;
var
  Codec: ICharCodec;
  b: byte;
  Ok: boolean;
begin
Codec := CharCodec;
ReadBytes := 0;
repeat
  Ok := Document.Read( b, 1) = 1;
  if Ok then
    Inc( ReadBytes)
until (not Ok) or (not Codec.ReadUniChar( b, result));
if Ok then exit;
if ReadBytes <> 0 then
    result := ReplacementCharacter
  else
    result := 0
end;

function TEncoding.ReadCharactersFromStream(
  Document: TStream;
  RequestCount: integer; out ReadCount, ReadBytes: Integer): string;
var
  Codec: ICharCodec;
  b: byte;
  Ok: boolean;
  ch: unichar;
  Lead, Trail: Char;
  Idx: integer;
  RequestIndex: integer;
  BytesReadThisChar: integer;
begin
Codec     := CharCodec;
ReadBytes := 0;
Idx       := 1;
ReadCount := 0;
SetLength( result, RequestCount*2);  // Estimate average 2 bytes per character.
for RequestIndex := 1 to RequestCount do
  begin
  BytesReadThisChar := 0;
  repeat
    Ok := Document.Read( b, 1) = 1;
    if Ok then
      Inc( BytesReadThisChar)
  until (not Ok) or (not Codec.ReadUniChar( b, ch));
  Inc( ReadBytes, BytesReadThisChar);
  if BytesReadThisChar = 0 then break;
  if not Ok then
    ch := ReplacementCharacter; // Partial character at the end of the stream.
  Inc( ReadCount);
  if UniChar_to_Char( Ch, Lead, Trail) then
      begin
      if Idx >= Length( result) then
        SetLength( result, Idx+1);
      result[Idx] := Lead;
      Inc( Idx);
      result[Idx] := Trail;
      Inc( Idx)
      end
    else
      begin
      if Idx > Length( result) then
        SetLength( result, Idx);
      result[Idx] := Lead;
      Inc( Idx)
      end
  end;
SetLength( result, Idx - 1)
end;

function TEncoding.ToString( const Encoded: RawByteString): string;
begin

end;

{ TCharCodec }

function TCharCodec.isInSync: boolean;
begin
result := True
end;

procedure TCharCodec.SetSynced;
begin
end;

{ TUTF8Family }

function TUTF8Family.BaseEncoding: IEncoding;
begin

end;

function TUTF8Family.Name: string;
begin

end;

{ TUTF16Family }

function TUTF16Family.BaseEncoding: IEncoding;
begin

end;

function TUTF16Family.Name: string;
begin

end;

{ TUTF16LEFamily }

function TUTF16LEFamily.BaseEncoding: IEncoding;
begin

end;

{ TUTF16BEFamily }

function TUTF16BEFamily.BaseEncoding: IEncoding;
begin

end;

{ TUTF8Encoding }

function TUTF8Encoding.CharCodec: ICharCodec;
begin
result := TUTF8Codec.Create
end;

function TUTF8Encoding.FromString( const Native: string): RawByteString;
begin

end;

function TUTF8Encoding.isNative: boolean;
begin
result := False
end;

function TUTF8Encoding.Name: string;
begin
result := 'utf-8'
end;

function TUTF8Encoding.ToString( const Encoded: RawByteString): string;
begin

end;

{ TUTF16LEEncoding }

function TUTF16LEEncoding.CharCodec: ICharCodec;
begin

end;

function TUTF16LEEncoding.FromString(const Native: string): RawByteString;
begin

end;

function TUTF16LEEncoding.isNative: boolean;
begin
result := True
end;

function TUTF16LEEncoding.Name: string;
begin
result := 'utf-16le'
end;

function TUTF16LEEncoding.ToString(const Encoded: RawByteString): string;
begin

end;

{ TUTF16BEEncoding }

function TUTF16BEEncoding.CharCodec: ICharCodec;
begin

end;

function TUTF16BEEncoding.FromString(const Native: string): RawByteString;
begin

end;

function TUTF16BEEncoding.isNative: boolean;
begin

end;

function TUTF16BEEncoding.Name: string;
begin

end;

function TUTF16BEEncoding.ToString(const Encoded: RawByteString): string;
begin

end;

{ ISO_8859_1 }

function ISO_8859_1.CharCodec: ICharCodec;
begin

end;

function ISO_8859_1.FromString(const Native: string): RawByteString;
begin

end;

function ISO_8859_1.isNative: boolean;
begin

end;

function ISO_8859_1.Name: string;
begin

end;

function ISO_8859_1.ToString(const Encoded: RawByteString): string;
begin

end;

{ TUTF16LECodec }

function TUTF16LECodec.ReadUniChar( b: byte; var ch: UniChar): boolean;
begin
FBuff.Bytes[FByteIdx] := b;
Inc( FByteIdx);
if FByteIdx = 2 then
  begin
  if IsTailSurrogate( Char( FBuff.Words[0])) then
    begin
    ch := ReplacementCharacter;
    result := False;
    Exit
    end;
  if Char_to_UniChar1( Char( FBuff.Words[0]), ch) then
      FNeedBytes := 2
    else
      FNeedBytes := 0;
  end;
result := FByteIdx < (FNeedBytes + 2);
if result then exit;
if FNeedBytes = 2 then
  begin
  if not IsTailSurrogate( Char( FBuff.Words[0])) then
    begin
    ch := ReplacementCharacter;
    result := False;
    Exit
    end;
  Char_to_UniChar2( Char( FBuff.Words[1]), ch);
  end;
FByteIdx   := 0;
FNeedBytes := 0;
Cardinal( FBuff) := 0
end;


function TUTF16LECodec.WriteUniChar( ch: UniChar; var b: byte): boolean;
begin
if FByteIdx = 0 then
  if UniChar_to_Char( Ch,  Char( FBuff.Words[0]),  Char( FBuff.Words[0])) then
      FNeedBytes := 2
    else
      FNeedBytes := 0;
b := LongRec(ch).Bytes[FByteIdx];
Inc( FByteIdx);
result := FByteIdx < (FNeedBytes + 2);
if result then exit;
FByteIdx   := 0;
FNeedBytes := 0;
Cardinal( FBuff) := 0
end;

function TUTF16LECodec.isInSync: boolean;
begin
result := FByteIdx = 0
end;

procedure TUTF16LECodec.SetSynced;
begin
FByteIdx   := 0;
FNeedBytes := 0;
Cardinal( FBuff) := 0
end;


{ TUTF8Codec }

function TUTF8Codec.ReadUniChar( b: byte; var ch: UniChar): boolean;
begin           // he replacement character "�" (U+FFFD)
FBuff.Bytes[FByteIdx] := b;
Inc( FByteIdx);
if FByteIdx = 1 then
    begin
    if b <= $7F then
        FNeedBytes := 0 // single byte
      else if b <= $BF then
        begin
        // continuation
        ch := ReplacementCharacter;
        result := False;
        Exit
        end
      else if b <= $DF then
        FNeedBytes := 1
      else if b <= $EF then
        FNeedBytes := 2
      else if b <= $F7 then
        FNeedBytes := 3
      else
        begin
        // Invalid encoding.
        ch := ReplacementCharacter;
        result := False;
        Exit
        end
    end
  else if (b > $BF) or (b <= $DF) then
        begin
        ch := ReplacementCharacter;
        Result := False;
        exit
        end;
result := FByteIdx < (FNeedBytes + 1);
if result then exit;
if not result then
  begin
  ch := 0;
  case FNeedBytes of
    0: ch := FBuff.Bytes[0];
    1: ch := (((FBuff.Bytes[0] and $1F) shl  6) or  (FBuff.Bytes[1] and $3F));
    2: ch :=  ((FBuff.Bytes[0] and $0F) shl 12) or ((FBuff.Bytes[1] and $3F) shl  6) or  (FBuff.Bytes[2] and $3F);
    3: ch :=  ((FBuff.Bytes[0] and $07) shl 18) or ((FBuff.Bytes[1] and $3F) shl 12) or ((FBuff.Bytes[2] and $3F) shl 6) or (FBuff.Bytes[3] and $3F);
    end
  end;
FByteIdx   := 0;
FNeedBytes := 0;
Cardinal( FBuff) := 0
end;

function TUTF8Codec.WriteUniChar( ch: UniChar; var b: byte): boolean;
begin
Cardinal( FBuff) := 0;
if ch <= $00007F then
    begin
    FNeedBytes := 0; // single byte
    FBuff.Bytes[0] := ch
    end
  else if ch <= $0007FF then
    begin
    FNeedBytes := 1; // 2 bytes
    FBuff.Bytes[0] :=  (ch shr  6 ) or $C0;
    FBuff.Bytes[1] := ((ch and $3F) and $C0) or $80
    end
  else  if ch <= $00FFFF then
    begin
    FNeedBytes := 2; // 3 bytes
    FBuff.Bytes[0] := ( ch shr 12 ) or $E0;
    FBuff.Bytes[1] := ((ch shr  6 ) and $C0) or $80;
    FBuff.Bytes[2] := ((ch and $3F) and $C0) or $80
    end
  else
    begin
    FNeedBytes := 3; // 4 bytes
    FBuff.Bytes[0] := ((ch shr 18 ) and $F8) or $F0;
    FBuff.Bytes[1] := ((ch shr 12 ) and $C0) or $80;
    FBuff.Bytes[2] := ((ch shr  6 ) and $C0) or $80;
    FBuff.Bytes[3] := ((ch and $3F) and $C0) or $80
    end;
//http://www.herongyang.com/Unicode/index.html
// and http://source.icu-project.org/repos/icu/data/trunk/charset/data/xml/gb-18030-2000.xml
// table:
//  range:
//   unicode code-point
//   gb code point (mapped to/from 4 byte)
//   range length
//1        0x00 - 0x7F                                           : 128 MSB             =     128   cum:     128
//2        0x81 - 0xFE   0x40 - 0x7E                             : 126 *  63           =    7938   cum:    8066
//2        0x81 - 0xFE   0x80 - 0xFE                             : 126 * 127           =   16002   cum:   24068
//4        0x81 - 0xFE   0x30 - 0x39   0x81 - 0xFE   0x30 - 0x39 : 126 * 10 * 126 * 10 = 1587600   cum: 1611668
end;

//                 Also read http://www.unicode.org/reports/tr22/
//                 and http://site.icu-project.org/charts/charset
//                 http://icu-project.org/docs/papers/gb18030.html
//
//xmlns:http://source.icu-project.org/repos/icu/data/trunk/charset/data/xml/gb-18030-2000
//url=http://source.icu-project.org/repos/icu/data/trunk/charset/data/xml/gb-18030-2000.xml
//file=C:\TEMP\Projects\Third Party\PascalRegex\gb-18030-2000.xml
//
///characterMapping/assignments/a/@u           : string  regex=[0-9A-F]{4,6}
///characterMapping/assignments/a/@b           : string* regex=[0-9A-F]{2}
///characterMapping/assignments/a/range/@uFirst: string  regex=[0-9A-F]{4,6}
///characterMapping/assignments/a/range/@uLast : string  regex=[0-9A-F]{4,6}
///characterMapping/assignments/a/range/@bFirst: string* regex=[0-9A-F]{2}
///characterMapping/assignments/a/range/@bLast : string* regex=[0-9A-F]{2}




function GBBytes_to_GB_CodePoint( GBBytes: LongRec; CodePointRangeCase: Integer): GB_CodePoint;
begin
result := 0;
case CodePointRangeCase of
  0: result :=   GBBytes.Bytes[0];
  1: result := ((GBBytes.Bytes[0] - $81) *    126) +  (GBBytes.Bytes[1] - $40)                                                                        +   128;
  2: result := ((GBBytes.Bytes[0] - $81) *    126) +  (GBBytes.Bytes[1] - $80)                                                                        +  8066;
  3: result := ((GBBytes.Bytes[0] - $81) * 158760) + ((GBBytes.Bytes[1] - $30) * 1260)  + ((GBBytes.Bytes[2] - $81) * 126) + (GBBytes.Bytes[3] - $30) + 24068;
  end;
end;


function GB_CodePoint_to_GBBytes( CodePoint: GB_CodePoint; out CodePointRangeCase: Integer): LongRec;
begin
cardinal( result) := 0;
if CodePoint < 128 then
    begin
    CodePointRangeCase := 0;
    Result.Bytes[0] := CodePoint
    end
  else if CodePoint < 8066 then
    begin
    CodePointRangeCase := 1;
    Dec( CodePoint, 128);
    Result.Bytes[0] := (CodePoint div 126) + $81;
    Result.Bytes[1] := (CodePoint mod 126) + $40
    end
  else if CodePoint < 24068 then
    begin
    CodePointRangeCase := 2;
    Dec( CodePoint, 8066);
    Result.Bytes[0] := (CodePoint div 126) + $81;
    Result.Bytes[1] := (CodePoint mod 126) + $80
    end
  else
    begin
    CodePointRangeCase := 3;
    Dec( CodePoint, 24068);
    result.Bytes[2] := (CodePoint div 10) + $81;
    result.Bytes[3] := (CodePoint mod 10) + $30;
    CodePoint := result.Bytes[2];
    result.Bytes[0] := (CodePoint div 126) + $81;
    result.Bytes[1] := (CodePoint mod 126) + $30
    end
end;

function ByteLengthOfRangeCase( CodePointRangeCase: integer): integer;
begin
case CodePointRangeCase of
  0:   result := 1;
  1,2: result := 2;
  3:   result := 4;
  end;
end;


function EstimateRangeCase( ByteValue: byte; ByteIndex: Integer{0 or 1 only}): integer;
begin
if ByteIndex = 0 then
    begin
    if ByteValue < 128 then
        result := 0
      else
        result := 1
    end
  else if ByteIndex = 1 then
    begin
    if ByteValue <= $30 then
       result := 3
      else if ByteValue <= $7E then
        result := 1
      else if ByteValue <= $FE then
        result := 2
    end;
end;

function IsValidByte( ByteValue: byte; ByteIndex: Integer; CodePointRangeCase: integer): boolean;
begin
case CodePointRangeCase of
  0: result :=  ByteValue < 128;
  1,2: if ByteIndex = 0 then
         result := (ByteValue >= $81) and (ByteValue <= $FE)
       else
         result := (ByteValue >= $40) and (ByteValue <= $FE) and (ByteValue <> $7F);
  3: case ByteIndex of
       0: result := (ByteValue >= $81) and (ByteValue <= $FE);
       1: result := (ByteValue >= $30) and (ByteValue <= $39);
       2: result := (ByteValue >= $81) and (ByteValue <= $FE);
       3: result := (ByteValue >= $30) and (ByteValue <= $39);
     end;
  end;
end;

function TUTF8Codec.isInSync: boolean;
begin
result := FByteIdx = 0
end;

procedure TUTF8Codec.SetSynced;
begin
FByteIdx   := 0;
FNeedBytes := 0;
Cardinal( FBuff) := 0
end;


end.




Show php error messages
 
PHP (5.2.17) NOTICE (E_NOTICE):
File: lib/wiki-plugins/wikiplugin_code.php
Line: 196
Type: Undefined variable: colors
PHP (5.2.17) NOTICE (E_NOTICE):
File: lib/wiki-plugins/wikiplugin_code.php
Line: 196
Type: Undefined variable: colors