From ff2817f8219345990cb678d5846a2b895db2cec9 Mon Sep 17 00:00:00 2001 From: ScottPJones Date: Sat, 13 Jun 2015 14:07:29 -0400 Subject: [PATCH 1/2] Update to merge in codeunit methods --- src/Encodings.jl | 98 ++++++++++++++++++++++++++++++------------------ 1 file changed, 61 insertions(+), 37 deletions(-) diff --git a/src/Encodings.jl b/src/Encodings.jl index ec904c8..96f1750 100644 --- a/src/Encodings.jl +++ b/src/Encodings.jl @@ -1,45 +1,69 @@ +#= +module Encodings + +export Encoding +export Binary, ASCII, Latin1, UTF8 # 7/8-bit encodings +export UCS2, UCS2LE, UCS2BE, UCS2OE # 16-bit encodings (16-bit subset of Unicode) +export UTF16, UTF16LE, UTF16BE, UTF16OE # 16-bit encodings +export UTF32, UTF32LE, UTF32BE, UTF32OE # 32-bit encodings +export BIG_ENDIAN +export native_endian, big_endian, codeunit +=# + abstract Encoding abstract DirectIndexedEncoding <: Encoding -immutable ASCII <: DirectIndexedEncoding end -immutable Latin1 <: DirectIndexedEncoding end - -immutable UTF8 <: Encoding end -immutable UTF16LE <: Encoding end -immutable UTF32LE <: DirectIndexedEncoding end -immutable UCS2LE <: DirectIndexedEncoding end - -immutable UTF16BE <: Encoding end -immutable UTF32BE <: DirectIndexedEncoding end -immutable UCS2BE <: DirectIndexedEncoding end - -if ENDIAN_BOM == 0x01020304 - typealias UTF16 UTF16BE - typealias UTF32 UTF32BE - typealias UCS2 UCS2BE - typealias UTF16OE UTF16LE - typealias UTF32OE UTF32LE - typealias UCS2OE UCS2LE -elseif ENDIAN_BOM == 0x04030201 - typealias UTF16 UTF16LE - typealias UTF32 UTF32LE - typealias UCS2 UCS2LE - typealias UTF16OE UTF16BE - typealias UTF32OE UTF32BE - typealias UCS2OE UCS2BE +abstract Binary <: DirectIndexedEncoding +abstract ASCII <: DirectIndexedEncoding +abstract Latin1 <: DirectIndexedEncoding + +abstract UTF8 <: Encoding +abstract UTF16 <: Encoding +abstract UTF32 <: DirectIndexedEncoding +abstract UCS2 <: DirectIndexedEncoding + +# Opposite endian encodings of 16-bit and 32-bit encodings +abstract UTF16OE <: UTF16 +abstract UTF32OE <: UTF32 +abstract UCS2OE <: UCS2 + +# This is easier to use (and not get the ordering mixed up) than ENDIAN_BOM +const BIG_ENDIAN = reinterpret(UInt32,UInt8[1:4;])[1] == 0x01020304 + +if BIG_ENDIAN + abstract UTF16BE <: UTF16 + abstract UTF32BE <: UTF32 + abstract UCS2BE <: UCS2 + abstract UTF16LE <: UTF16OE + abstract UTF32LE <: UTF32OE + abstract UCS2LE <: UCS2OE +else + abstract UTF16LE <: UTF16 + abstract UTF32LE <: UTF32 + abstract UCS2LE <: UCS2 + abstract UTF16BE <: UTF16OE + abstract UTF32BE <: UTF32OE + abstract UCS2BE <: UCS2OE +end + +native_endian{E <: Encoding}(::Type{E}) = true +native_endian{E <: UTF16OE}(::Type{E}) = false +native_endian{E <: UTF32OE}(::Type{E}) = false +native_endian{E <: UCS2OE}(::Type{E}) = false + +if BIG_ENDIAN +big_endian{E <: Encoding}(::Type{E}) = native_endian(E) else - error("seriously? what is this machine?") +big_endian{E <: Encoding}(::Type{E}) = !native_endian(E) end -codeunit(::Type{ASCII}) = UInt8 -codeunit(::Type{Latin1}) = UInt8 -codeunit(::Type{UTF8}) = UInt8 -codeunit(::Type{UTF16LE}) = UInt16 -codeunit(::Type{UTF32LE}) = UInt32 -codeunit(::Type{UCS2LE}) = UInt16 -codeunit(::Type{UTF16BE}) = UInt16 -codeunit(::Type{UTF32BE}) = UInt32 -codeunit(::Type{UCS2BE}) = UInt16 +codeunit{E <: ASCII}(::Type{E}) = UInt8 +codeunit{E <: Latin1}(::Type{E}) = UInt8 +codeunit{E <: UTF8}(::Type{E}) = UInt8 +codeunit{E <: UTF16}(::Type{E}) = UInt16 +codeunit{E <: UCS2}(::Type{E}) = UInt16 +codeunit{E <: UTF32}(::Type{E}) = UInt32 # size of code unit in bytes -Base.sizeof{E<:Encoding}(::Type{E}) = sizeof(codeunit(E)) +Base.sizeof{E <: Encoding}(::Type{E}) = sizeof(codeunit(E)) +#end From 003d02ce9d9129cdde5110496be7e6113ff0ca53 Mon Sep 17 00:00:00 2001 From: ScottPJones Date: Sat, 13 Jun 2015 10:56:33 -0400 Subject: [PATCH 2/2] Make encodings abstract, add native_endian/big_endian methods --- src/Encodings.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Encodings.jl b/src/Encodings.jl index 96f1750..dfd2544 100644 --- a/src/Encodings.jl +++ b/src/Encodings.jl @@ -66,4 +66,5 @@ codeunit{E <: UTF32}(::Type{E}) = UInt32 # size of code unit in bytes Base.sizeof{E <: Encoding}(::Type{E}) = sizeof(codeunit(E)) + #end