@@ -293,13 +293,43 @@ def to_arr
293293
294294##
295295# A String object with a BER identifier attached.
296+ #
296297class Net ::BER ::BerIdentifiedString < String
297298 attr_accessor :ber_identifier
299+
300+ # The binary data provided when parsing the result of the LDAP search
301+ # has the encoding 'ASCII-8BIT' (which is basically 'BINARY', or 'unknown').
302+ #
303+ # This is the kind of a backtrace showing how the binary `data` comes to
304+ # BerIdentifiedString.new(data):
305+ #
306+ # @conn.read_ber(syntax)
307+ # -> StringIO.new(self).read_ber(syntax), i.e. included from module
308+ # -> Net::BER::BERParser.read_ber(syntax)
309+ # -> (private)Net::BER::BERParser.parse_ber_object(syntax, id, data)
310+ #
311+ # In the `#parse_ber_object` method `data`, according to its OID, is being
312+ # 'casted' to one of the Net::BER:BerIdentifiedXXX classes.
313+ #
314+ # As we are using LDAP v3 we can safely assume that the data is encoded
315+ # in UTF-8 and therefore the only thing to be done when instantiating is to
316+ # switch the encoding from 'ASCII-8BIT' to 'UTF-8'.
317+ #
318+ # Unfortunately, there are some ActiveDirectory specific attributes
319+ # (like `objectguid`) that should remain binary (do they really?).
320+ # Using the `#valid_encoding?` we can trap this cases. Special cases like
321+ # Japanese, Korean, etc. encodings might also profit from this. However
322+ # I have no clue how this encodings function.
298323 def initialize args
299- super begin
300- args . respond_to? ( :encode ) ? args . encode ( 'UTF-8' ) : args
301- rescue
302- args
324+ super
325+ #
326+ # Check the encoding of the newly created String and set the encoding
327+ # to 'UTF-8' (NOTE: we do NOT change the bytes, but only set the
328+ # encoding to 'UTF-8').
329+ current_encoding = encoding
330+ if current_encoding == Encoding ::BINARY
331+ force_encoding ( 'UTF-8' )
332+ force_encoding ( current_encoding ) unless valid_encoding?
303333 end
304334 end
305335end
0 commit comments