Skip to content

Commit 182a2d9

Browse files
committed
add AbstractChar supertype of Char
1 parent fc4071f commit 182a2d9

File tree

41 files changed

+494
-414
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+494
-414
lines changed

NEWS.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -452,6 +452,9 @@ Library improvements
452452
* The function `thisind(s::AbstractString, i::Integer)` returns the largest valid index
453453
less or equal than `i` in the string `s` or `0` if no such index exists ([#24414]).
454454

455+
* `Char` is now a subtype of `AbstractChar`, and most of the functions that
456+
take character arguments now accept any `AbstractChar`.
457+
455458
* `Irrational` is now a subtype of `AbstractIrrational` ([#24245]).
456459

457460
* Introduced the `empty` function, the functional pair to `empty!` which returns a new,

base/arrayshow.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ methods. By default returns a string of the same width as original with a
3939
centered cdot, used in printing of structural zeros of structured matrices.
4040
Accept keyword args `c` for alternate single character marker.
4141
"""
42-
function replace_with_centered_mark(s::AbstractString;c::Char = '')
42+
function replace_with_centered_mark(s::AbstractString;c::AbstractChar = '')
4343
N = length(s)
4444
return join(setindex!([" " for i=1:N],string(c),ceil(Int,N/2)))
4545
end

base/boot.jl

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ export
143143
Signed, Int, Int8, Int16, Int32, Int64, Int128,
144144
Unsigned, UInt, UInt8, UInt16, UInt32, UInt64, UInt128,
145145
# string types
146-
Char, AbstractString, String, IO,
146+
AbstractChar, Char, AbstractString, String, IO,
147147
# errors
148148
ErrorException, BoundsError, DivideError, DomainError, Exception,
149149
InterruptException, InexactError, OutOfMemoryError, ReadOnlyMemoryError,
@@ -177,7 +177,8 @@ primitive type Float32 <: AbstractFloat 32 end
177177
primitive type Float64 <: AbstractFloat 64 end
178178

179179
#primitive type Bool <: Integer 8 end
180-
primitive type Char 32 end
180+
abstract type AbstractChar end
181+
primitive type Char <: AbstractChar 32 end
181182

182183
primitive type Int8 <: Signed 8 end
183184
#primitive type UInt8 <: Unsigned 8 end
@@ -460,7 +461,7 @@ function write(io::IO, x::String)
460461
end
461462

462463
show(io::IO, @nospecialize x) = ccall(:jl_static_show, Cvoid, (Ptr{Cvoid}, Any), io_pointer(io), x)
463-
print(io::IO, x::Char) = ccall(:jl_uv_putc, Cvoid, (Ptr{Cvoid}, Char), io_pointer(io), x)
464+
print(io::IO, x::AbstractChar) = ccall(:jl_uv_putc, Cvoid, (Ptr{Cvoid}, Char), io_pointer(io), x)
464465
print(io::IO, x::String) = (write(io, x); nothing)
465466
print(io::IO, @nospecialize x) = show(io, x)
466467
print(io::IO, @nospecialize(x), @nospecialize a...) = (print(io, x); print(io, a...))
@@ -701,9 +702,10 @@ UInt32(x::BuiltinInts) = toUInt32(x)::UInt32
701702
UInt64(x::BuiltinInts) = toUInt64(x)::UInt64
702703
UInt128(x::BuiltinInts) = toUInt128(x)::UInt128
703704

704-
Char(x::Number) = Char(UInt32(x))
705-
Char(x::Char) = x
706-
(::Type{T})(x::Char) where {T<:Number} = T(UInt32(x))
705+
(::Type{T})(x::Number) where {T<:AbstractChar} = T(UInt32(x))
706+
(::Type{AbstractChar})(x::Number) = Char(x)
707+
(::Type{T})(x::AbstractChar) where {T<:Union{Number,AbstractChar}} = T(UInt32(x))
708+
(::Type{T})(x::T) where {T<:AbstractChar} = x
707709

708710
(::Type{T})(x::T) where {T<:Number} = x
709711

base/char.jl

Lines changed: 83 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,49 @@
11
# This file is a part of Julia. License is MIT: https://julialang.org/license
22

3-
struct InvalidCharError <: Exception
4-
char::Char
3+
"""
4+
The `AbstractChar` type is the supertype of all character implementations
5+
in Julia. A character represents a Unicode code point, and can be converted
6+
to/from `UInt32` in order to obtain the numerical value of the code point.
7+
These numerical values determine how characters are compared with `<` and `==`,
8+
for example.
9+
10+
A given `AbstractChar` subtype may be capable of representing only a subset
11+
of Unicode, in which case conversion from an unsupported `UInt32` value
12+
may throw an error. Conversely, the built-in [`Char`](@ref) type represents
13+
a *superset* of Unicode (in order to losslessly encode invalid byte streams),
14+
in which case conversion of a non-Unicode value *to* `UInt32` throws an error.
15+
The [`isvalid`](@ref) function can be used to check which codepoints are
16+
representable in a given `AbstractChar` type.
17+
18+
Internally, an `AbstractChar` type may use a variety of encodings. Conversion
19+
to `UInt32` will not reveal this encoding because it always returns the
20+
Unicode value of the character. (Typically, the raw encoding can be obtained
21+
via [`reinterpret`](@ref).)
22+
"""
23+
AbstractChar
24+
25+
"""
26+
Char(c::Union{Number,AbstractChar})
27+
28+
`Char` is a 32-bit [`AbstractChar`](@ref) type that is the default representation
29+
of characters in Julia. `Char` is the type used for character literals like `'x'`
30+
and it is also the element type of [`String`](@ref).
31+
32+
In order to losslessly represent arbitrary byte streams stored in a `String`,
33+
a `Char` value may store information that cannot be converted to a Unicode
34+
codepoint — converting such a `Char` to `UInt32` will throw an error.
35+
The [`isvalid(c::Char)`](@ref) function can be used to query whether `c`
36+
represents a valid Unicode character.
37+
"""
38+
Char
39+
40+
struct InvalidCharError{T<:AbstractChar} <: Exception
41+
char::T
542
end
643
struct CodePointError <: Exception
744
code::Integer
845
end
9-
@noinline invalid_char(c::Char) = throw(InvalidCharError(c))
46+
@noinline invalid_char(c::AbstractChar) = throw(InvalidCharError(c))
1047
@noinline code_point_err(u::UInt32) = throw(CodePointError(u))
1148

1249
function ismalformed(c::Char)
@@ -24,6 +61,11 @@ function isoverlong(c::Char)
2461
is_overlong_enc(u)
2562
end
2663

64+
# fallback: other AbstractChar types, by default, are assumed
65+
# not to support malformed or overlong encodings.
66+
ismalformed(c::AbstractChar) = false
67+
isoverlong(c::AbstractChar) = false
68+
2769
function UInt32(c::Char)
2870
# TODO: use optimized inline LLVM
2971
u = reinterpret(UInt32, c)
@@ -69,50 +111,57 @@ function Char(b::Union{Int8,UInt8})
69111
0 b 0x7f ? reinterpret(Char, (b % UInt32) << 24) : Char(UInt32(b))
70112
end
71113

72-
convert(::Type{Char}, x::Number) = Char(x)
73-
convert(::Type{T}, x::Char) where {T<:Number} = T(x)
114+
convert(::Type{AbstractChar}, x::Number) = Char(x) # default to Char
115+
convert(::Type{T}, x::Number) where {T<:AbstractChar} = T(x)
116+
convert(::Type{T}, x::AbstractChar) where {T<:Number} = T(x)
74117

75-
rem(x::Char, ::Type{T}) where {T<:Number} = rem(UInt32(x), T)
118+
rem(x::AbstractChar, ::Type{T}) where {T<:Number} = rem(UInt32(x), T)
76119

77120
typemax(::Type{Char}) = reinterpret(Char, typemax(UInt32))
78121
typemin(::Type{Char}) = reinterpret(Char, typemin(UInt32))
79122

80-
size(c::Char) = ()
81-
size(c::Char,d) = convert(Int, d) < 1 ? throw(BoundsError()) : 1
82-
ndims(c::Char) = 0
83-
ndims(::Type{Char}) = 0
84-
length(c::Char) = 1
85-
firstindex(c::Char) = 1
86-
lastindex(c::Char) = 1
87-
getindex(c::Char) = c
88-
getindex(c::Char, i::Integer) = i == 1 ? c : throw(BoundsError())
89-
getindex(c::Char, I::Integer...) = all(x -> x == 1, I) ? c : throw(BoundsError())
90-
first(c::Char) = c
91-
last(c::Char) = c
92-
eltype(::Type{Char}) = Char
93-
94-
start(c::Char) = false
95-
next(c::Char, state) = (c, true)
96-
done(c::Char, state) = state
97-
isempty(c::Char) = false
98-
in(x::Char, y::Char) = x == y
123+
size(c::AbstractChar) = ()
124+
size(c::AbstractChar,d) = convert(Int, d) < 1 ? throw(BoundsError()) : 1
125+
ndims(c::AbstractChar) = 0
126+
ndims(::Type{<:AbstractChar}) = 0
127+
length(c::AbstractChar) = 1
128+
firstindex(c::AbstractChar) = 1
129+
lastindex(c::AbstractChar) = 1
130+
getindex(c::AbstractChar) = c
131+
getindex(c::AbstractChar, i::Integer) = i == 1 ? c : throw(BoundsError())
132+
getindex(c::AbstractChar, I::Integer...) = all(x -> x == 1, I) ? c : throw(BoundsError())
133+
first(c::AbstractChar) = c
134+
last(c::AbstractChar) = c
135+
eltype(::Type{T}) where {T<:AbstractChar} = T
136+
137+
start(c::AbstractChar) = false
138+
next(c::AbstractChar, state) = (c, true)
139+
done(c::AbstractChar, state) = state
140+
isempty(c::AbstractChar) = false
141+
in(x::AbstractChar, y::AbstractChar) = x == y
99142

100143
==(x::Char, y::Char) = reinterpret(UInt32, x) == reinterpret(UInt32, y)
101144
isless(x::Char, y::Char) = reinterpret(UInt32, x) < reinterpret(UInt32, y)
102145
hash(x::Char, h::UInt) =
103146
hash_uint64(((reinterpret(UInt32, x) + UInt64(0xd4d64234)) << 32) UInt64(h))
104-
widen(::Type{Char}) = Char
105147

106-
-(x::Char, y::Char) = Int(x) - Int(y)
107-
-(x::Char, y::Integer) = Char(Int32(x) - Int32(y))
108-
+(x::Char, y::Integer) = Char(Int32(x) + Int32(y))
109-
+(x::Integer, y::Char) = y + x
148+
# fallbacks:
149+
isless(x::AbstractChar, y::AbstractChar) = isless(Char(x), Char(y))
150+
==(x::AbstractChar, y::AbstractChar) = Char(x) == Char(y)
151+
hash(x::AbstractChar, h::UInt) =
152+
hash_uint64(((UInt32(x) + UInt64(0xd060fad0)) << 32) UInt64(h))
153+
widen(::Type{T}) where {T<:AbstractChar} = T
154+
155+
-(x::AbstractChar, y::AbstractChar) = Int(x) - Int(y)
156+
-(x::T, y::Integer) where {T<:AbstractChar} = T(Int32(x) - Int32(y))
157+
+(x::T, y::Integer) where {T<:AbstractChar} = T(Int32(x) + Int32(y))
158+
+(x::Integer, y::AbstractChar) = y + x
110159

111-
print(io::IO, c::Char) = (write(io, c); nothing)
160+
print(io::IO, c::AbstractChar) = (write(io, c); nothing)
112161

113162
const hex_chars = UInt8['0':'9';'a':'z']
114163

115-
function show(io::IO, c::Char)
164+
function show(io::IO, c::AbstractChar)
116165
if c <= '\\'
117166
b = c == '\0' ? 0x30 :
118167
c == '\a' ? 0x61 :
@@ -154,14 +203,14 @@ function show(io::IO, c::Char)
154203
return
155204
end
156205

157-
function show(io::IO, ::MIME"text/plain", c::Char)
206+
function show(io::IO, ::MIME"text/plain", c::T) where {T<:AbstractChar}
158207
show(io, c)
159208
if !ismalformed(c)
160209
print(io, ": ")
161210
if isoverlong(c)
162211
print(io, "[overlong] ")
163212
u = decode_overlong(c)
164-
c = Char(u)
213+
c = T(u)
165214
else
166215
u = UInt32(c)
167216
end

base/compiler/validation.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ is_valid_lvalue(x) = isa(x, Slot) || isa(x, SSAValue) || isa(x, GlobalRef)
209209
function is_valid_argument(x)
210210
if isa(x, Slot) || isa(x, SSAValue) || isa(x, GlobalRef) || isa(x, QuoteNode) ||
211211
(isa(x,Expr) && (x.head in (:static_parameter, :boundscheck, :copyast))) ||
212-
isa(x, Number) || isa(x, AbstractString) || isa(x, Char) || isa(x, Tuple) ||
212+
isa(x, Number) || isa(x, AbstractString) || isa(x, AbstractChar) || isa(x, Tuple) ||
213213
isa(x, Type) || isa(x, Core.Box) || isa(x, Module) || x === nothing
214214
return true
215215
end

base/filesystem.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ function read(f::File, ::Type{Char})
170170
end
171171
return reinterpret(Char, c)
172172
end
173+
read(f::File, ::Type{T}) where {T<:AbstractChar} = T(read(f, Char)) # fallback
173174

174175
function unsafe_read(f::File, p::Ptr{UInt8}, nel::UInt)
175176
check_open(f)

base/intfuncs.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -662,8 +662,8 @@ for sym in (:bin, :oct, :dec, :hex)
662662
@eval begin
663663
($sym)(x::Unsigned, p::Int) = ($sym)(x,p,false)
664664
($sym)(x::Unsigned) = ($sym)(x,1,false)
665-
($sym)(x::Char, p::Int) = ($sym)(UInt32(x),p,false)
666-
($sym)(x::Char) = ($sym)(UInt32(x),1,false)
665+
($sym)(x::AbstractChar, p::Int) = ($sym)(UInt32(x),p,false)
666+
($sym)(x::AbstractChar) = ($sym)(UInt32(x),1,false)
667667
($sym)(x::Integer, p::Int) = ($sym)(unsigned(abs(x)),p,x<0)
668668
($sym)(x::Integer) = ($sym)(unsigned(abs(x)),1,x<0)
669669
end

base/io.jl

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ read(io::AbstractPipe, byte::Type{UInt8}) = read(pipe_reader(io), byte)
229229
unsafe_read(io::AbstractPipe, p::Ptr{UInt8}, nb::UInt) = unsafe_read(pipe_reader(io), p, nb)
230230
read(io::AbstractPipe) = read(pipe_reader(io))
231231
readuntil(io::AbstractPipe, arg::UInt8; kw...) = readuntil(pipe_reader(io), arg; kw...)
232-
readuntil(io::AbstractPipe, arg::Char; kw...) = readuntil(pipe_reader(io), arg; kw...)
232+
readuntil(io::AbstractPipe, arg::AbstractChar; kw...) = readuntil(pipe_reader(io), arg; kw...)
233233
readuntil(io::AbstractPipe, arg::AbstractString; kw...) = readuntil(pipe_reader(io), arg; kw...)
234234
readuntil(io::AbstractPipe, arg::AbstractVector; kw...) = readuntil(pipe_reader(io), arg; kw...)
235235
readuntil_vector!(io::AbstractPipe, target::AbstractVector, keep::Bool, out) = readuntil_vector!(pipe_reader(io), target, keep, out)
@@ -303,7 +303,7 @@ read!(filename::AbstractString, a) = open(io->read!(io, a), filename)
303303
readuntil(filename::AbstractString, delim; keep::Bool = false)
304304
305305
Read a string from an I/O stream or a file, up to the given delimiter.
306-
The delimiter can be a `UInt8`, `Char`, string, or vector.
306+
The delimiter can be a `UInt8`, `AbstractChar`, string, or vector.
307307
Keyword argument `keep` controls whether the delimiter is included in the result.
308308
The text is assumed to be encoded in UTF-8.
309309
@@ -570,6 +570,7 @@ function write(io::IO, c::Char)
570570
n += 1
571571
end
572572
end
573+
write(io::IO, c::AbstractChar) = write(io, Char(c)) # fallback
573574

574575
function write(io::IO, s::Symbol)
575576
pname = unsafe_convert(Ptr{UInt8}, s)
@@ -627,12 +628,13 @@ function read(io::IO, ::Type{Char})
627628
end
628629
return reinterpret(Char, c)
629630
end
631+
read(io::IO, ::Type{T}) where {T<:AbstractChar} = T(read(io, Char)) # fallback
630632

631633
# readuntil_string is useful below since it has
632634
# an optimized method for s::IOStream
633635
readuntil_string(s::IO, delim::UInt8, keep::Bool) = String(readuntil(s, delim, keep=keep))
634636

635-
function readuntil(s::IO, delim::Char; keep::Bool=false)
637+
function readuntil(s::IO, delim::AbstractChar; keep::Bool=false)
636638
if delim '\x7f'
637639
return readuntil_string(s, delim % UInt8, keep)
638640
end
@@ -994,7 +996,7 @@ function skipchars(predicate, io::IO; linecomment=nothing)
994996
end
995997

996998
"""
997-
countlines(io::IO; eol::Char = '\\n')
999+
countlines(io::IO; eol::AbstractChar = '\\n')
9981000
9991001
Read `io` until the end of the stream/file and count the number of lines. To specify a file
10001002
pass the filename as the first argument. EOL markers other than `'\\n'` are supported by
@@ -1017,7 +1019,7 @@ julia> countlines(io, eol = '.')
10171019
1
10181020
```
10191021
"""
1020-
function countlines(io::IO; eol::Char='\n')
1022+
function countlines(io::IO; eol::AbstractChar='\n')
10211023
isascii(eol) || throw(ArgumentError("only ASCII line terminators are supported"))
10221024
aeol = UInt8(eol)
10231025
a = Vector{UInt8}(uninitialized, 8192)
@@ -1034,4 +1036,4 @@ function countlines(io::IO; eol::Char='\n')
10341036
nl
10351037
end
10361038

1037-
countlines(f::AbstractString; eol::Char = '\n') = open(io->countlines(io, eol = eol), f)::Int
1039+
countlines(f::AbstractString; eol::AbstractChar = '\n') = open(io->countlines(io, eol = eol), f)::Int

base/iterators.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ first(r::Reverse) = last(r.itr) # and the last shall be first
9090
reverse(R::AbstractRange) = Base.reverse(R) # copying ranges is cheap
9191
reverse(G::Generator) = Generator(G.f, reverse(G.iter))
9292
reverse(r::Reverse) = r.itr
93-
reverse(x::Union{Number,Char}) = x
93+
reverse(x::Union{Number,AbstractChar}) = x
9494
reverse(p::Pair) = Base.reverse(p) # copying pairs is cheap
9595

9696
start(r::Reverse{<:Tuple}) = length(r.itr)

base/parse.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ julia> parse(Complex{Float64}, "3.2e-1 + 4.5im")
3333
"""
3434
parse(T::Type, str; base = Int)
3535

36-
function parse(::Type{T}, c::Char; base::Integer = 36) where T<:Integer
36+
function parse(::Type{T}, c::AbstractChar; base::Integer = 36) where T<:Integer
3737
a::Int = (base <= 36 ? 10 : 36)
3838
2 <= base <= 62 || throw(ArgumentError("invalid base: base must be 2 ≤ base ≤ 62, got $base"))
3939
d = '0' <= c <= '9' ? c-'0' :

0 commit comments

Comments
 (0)