@@ -293,13 +293,43 @@ def to_arr
293293
294294## 
295295# A String object with a BER identifier attached. 
296+ #  
296297class  Net ::BER ::BerIdentifiedString  < String 
297298  attr_accessor  :ber_identifier 
299+ 
300+   # The binary data provided when parsing the result of the LDAP search 
301+   # has the encoding 'ASCII-8BIT' (which is basically 'BINARY', or 'unknown'). 
302+   #  
303+   # This is the kind of a backtrace showing how the binary `data` comes to 
304+   # BerIdentifiedString.new(data): 
305+   # 
306+   #  @conn.read_ber(syntax) 
307+   #     -> StringIO.new(self).read_ber(syntax), i.e. included from module 
308+   #     -> Net::BER::BERParser.read_ber(syntax)  
309+   #        -> (private)Net::BER::BERParser.parse_ber_object(syntax, id, data) 
310+   #  
311+   # In the `#parse_ber_object` method `data`, according to its OID, is being 
312+   # 'casted' to one of the Net::BER:BerIdentifiedXXX classes. 
313+   #  
314+   # As we are using LDAP v3 we can safely assume that the data is encoded 
315+   # in UTF-8 and therefore the only thing to be done when instantiating is to 
316+   # switch the encoding from 'ASCII-8BIT' to 'UTF-8'. 
317+   # 
318+   # Unfortunately, there are some ActiveDirectory specific attributes 
319+   # (like `objectguid`) that should remain binary (do they really?). 
320+   # Using the `#valid_encoding?` we can trap this cases. Special cases like 
321+   # Japanese, Korean, etc. encodings might also profit from this. However 
322+   # I have no clue how this encodings function. 
298323  def  initialize  args 
299-     super  begin 
300-       args . respond_to? ( :encode )  ? args . encode ( 'UTF-8' )  : args 
301-     rescue 
302-       args 
324+     super 
325+     #  
326+     # Check the encoding of the newly created String and set the encoding 
327+     # to 'UTF-8' (NOTE: we do NOT change the bytes, but only set the  
328+     # encoding to 'UTF-8'). 
329+     current_encoding  =  encoding 
330+     if  current_encoding  == Encoding ::BINARY 
331+       force_encoding ( 'UTF-8' ) 
332+       force_encoding ( current_encoding )  unless  valid_encoding? 
303333    end 
304334  end 
305335end 
0 commit comments