@@ -82,18 +82,10 @@ def __init__(self, values, index, level=-1, value_columns=None):
8282
8383 self .level = self .index ._get_level_number (level )
8484
85- levels = index .levels
86- labels = index .labels
87-
88- def _make_index (lev , lab ):
89- values = _make_index_array_level (lev .values , lab )
90- i = lev ._simple_new (values , lev .name ,
91- freq = getattr (lev , 'freq' , None ),
92- tz = getattr (lev , 'tz' , None ))
93- return i
94-
95- self .new_index_levels = [_make_index (lev , lab )
96- for lev , lab in zip (levels , labels )]
85+ # when index includes `nan`, need to lift levels/strides by 1
86+ self .lift = 1 if - 1 in self .index .labels [self .level ] else 0
87+
88+ self .new_index_levels = list (index .levels )
9789 self .new_index_names = list (index .names )
9890
9991 self .removed_name = self .new_index_names .pop (self .level )
@@ -134,10 +126,10 @@ def _make_selectors(self):
134126 ngroups = len (obs_ids )
135127
136128 comp_index = _ensure_platform_int (comp_index )
137- stride = self .index .levshape [self .level ]
129+ stride = self .index .levshape [self .level ] + self . lift
138130 self .full_shape = ngroups , stride
139131
140- selector = self .sorted_labels [- 1 ] + stride * comp_index
132+ selector = self .sorted_labels [- 1 ] + stride * comp_index + self . lift
141133 mask = np .zeros (np .prod (self .full_shape ), dtype = bool )
142134 mask .put (selector , True )
143135
@@ -166,20 +158,6 @@ def get_result(self):
166158 values = com .take_nd (values , inds , axis = 1 )
167159 columns = columns [inds ]
168160
169- # we might have a missing index
170- if len (index ) != values .shape [0 ]:
171- mask = isnull (index )
172- if mask .any ():
173- l = np .arange (len (index ))
174- values , orig_values = (np .empty ((len (index ), values .shape [1 ])),
175- values )
176- values .fill (np .nan )
177- values_indexer = com ._ensure_int64 (l [~ mask ])
178- for i , j in enumerate (values_indexer ):
179- values [j ] = orig_values [i ]
180- else :
181- index = index .take (self .unique_groups )
182-
183161 # may need to coerce categoricals here
184162 if self .is_categorical is not None :
185163 values = [ Categorical .from_array (values [:,i ],
@@ -220,9 +198,16 @@ def get_new_values(self):
220198
221199 def get_new_columns (self ):
222200 if self .value_columns is None :
223- return self .removed_level
201+ if self .lift == 0 :
202+ return self .removed_level
203+
204+ lev = self .removed_level
205+ vals = np .insert (lev .astype ('object' ), 0 ,
206+ _get_na_value (lev .dtype .type ))
207+
208+ return lev ._shallow_copy (vals )
224209
225- stride = len (self .removed_level )
210+ stride = len (self .removed_level ) + self . lift
226211 width = len (self .value_columns )
227212 propagator = np .repeat (np .arange (width ), stride )
228213 if isinstance (self .value_columns , MultiIndex ):
@@ -231,59 +216,34 @@ def get_new_columns(self):
231216
232217 new_labels = [lab .take (propagator )
233218 for lab in self .value_columns .labels ]
234- new_labels .append (np .tile (np .arange (stride ), width ))
235219 else :
236220 new_levels = [self .value_columns , self .removed_level ]
237221 new_names = [self .value_columns .name , self .removed_name ]
222+ new_labels = [propagator ]
238223
239- new_labels = []
240-
241- new_labels .append (propagator )
242- new_labels .append (np .tile (np .arange (stride ), width ))
243-
224+ new_labels .append (np .tile (np .arange (stride ) - self .lift , width ))
244225 return MultiIndex (levels = new_levels , labels = new_labels ,
245226 names = new_names , verify_integrity = False )
246227
247228 def get_new_index (self ):
248- result_labels = []
249- for cur in self .sorted_labels [:- 1 ]:
250- labels = cur .take (self .compressor )
251- labels = _make_index_array_level (labels , cur )
252- result_labels .append (labels )
229+ result_labels = [lab .take (self .compressor )
230+ for lab in self .sorted_labels [:- 1 ]]
253231
254232 # construct the new index
255233 if len (self .new_index_levels ) == 1 :
256- new_index = self .new_index_levels [0 ]
257- new_index .name = self .new_index_names [0 ]
258- else :
259- new_index = MultiIndex (levels = self .new_index_levels ,
260- labels = result_labels ,
261- names = self .new_index_names ,
262- verify_integrity = False )
263-
264- return new_index
234+ lev , lab = self .new_index_levels [0 ], result_labels [0 ]
235+ if not (lab == - 1 ).any ():
236+ return lev .take (lab )
265237
238+ vals = np .insert (lev .astype ('object' ), len (lev ),
239+ _get_na_value (lev .dtype .type )).take (lab )
266240
267- def _make_index_array_level (lev , lab ):
268- """ create the combined index array, preserving nans, return an array """
269- mask = lab == - 1
270- if not mask .any ():
271- return lev
272-
273- l = np .arange (len (lab ))
274- mask_labels = np .empty (len (mask [mask ]), dtype = object )
275- mask_labels .fill (_get_na_value (lev .dtype .type ))
276- mask_indexer = com ._ensure_int64 (l [mask ])
277-
278- labels = lev
279- labels_indexer = com ._ensure_int64 (l [~ mask ])
280-
281- new_labels = np .empty (tuple ([len (lab )]), dtype = object )
282- new_labels [labels_indexer ] = labels
283- new_labels [mask_indexer ] = mask_labels
284-
285- return new_labels
241+ return lev ._shallow_copy (vals )
286242
243+ return MultiIndex (levels = self .new_index_levels ,
244+ labels = result_labels ,
245+ names = self .new_index_names ,
246+ verify_integrity = False )
287247
288248def _unstack_multiple (data , clocs ):
289249 if len (clocs ) == 0 :
@@ -483,29 +443,10 @@ def _unstack_frame(obj, level):
483443
484444
485445def get_compressed_ids (labels , sizes ):
486- # no overflow
487- if com ._long_prod (sizes ) < 2 ** 63 :
488- group_index = get_group_index (labels , sizes )
489- comp_index , obs_ids = _compress_group_index (group_index )
490- else :
491- n = len (labels [0 ])
492- mask = np .zeros (n , dtype = bool )
493- for v in labels :
494- mask |= v < 0
495-
496- while com ._long_prod (sizes ) >= 2 ** 63 :
497- i = len (sizes )
498- while com ._long_prod (sizes [:i ]) >= 2 ** 63 :
499- i -= 1
500-
501- rem_index , rem_ids = get_compressed_ids (labels [:i ],
502- sizes [:i ])
503- sizes = [len (rem_ids )] + sizes [i :]
504- labels = [rem_index ] + labels [i :]
505-
506- return get_compressed_ids (labels , sizes )
446+ from pandas .core .groupby import get_flat_ids
507447
508- return comp_index , obs_ids
448+ ids = get_flat_ids (labels , sizes , True )
449+ return _compress_group_index (ids , sort = True )
509450
510451
511452def stack (frame , level = - 1 , dropna = True ):
0 commit comments