Skip to content

Commit 34684b1

Browse files
committed
remove map_partitions from offset_column function (using direct assignment)
1 parent 20f0603 commit 34684b1

File tree

1 file changed

+9
-42
lines changed

1 file changed

+9
-42
lines changed

sed/core/dfops.py

Lines changed: 9 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -392,53 +392,20 @@ def offset_by_other_columns(
392392
"Please open a request on GitHub if this feature is required.",
393393
)
394394

395-
# calculate the mean of the columns to reduce
396-
means = {
397-
col: dask.delayed(df[col].mean())
398-
for col, red, pm in zip(offset_columns, reductions, preserve_mean)
399-
if red or pm
400-
}
401-
402-
# define the functions to apply the offsets
403-
def shift_by_mean(x, cols, signs, means, flip_signs=False):
404-
"""Shift the target column by the mean of the offset columns."""
405-
for col in cols:
406-
s = -signs[col] if flip_signs else signs[col]
407-
x[target_column] = x[target_column] + s * means[col]
408-
return x[target_column]
409-
410-
def shift_by_row(x, cols, signs):
411-
"""Apply the offsets to the target column."""
412-
for col in cols:
413-
x[target_column] = x[target_column] + signs[col] * x[col]
414-
return x[target_column]
415-
416395
# apply offset from the reduced columns
417-
df[target_column] = df.map_partitions(
418-
shift_by_mean,
419-
cols=[col for col, red in zip(offset_columns, reductions) if red],
420-
signs=signs_dict,
421-
means=means,
422-
meta=df[target_column].dtype,
423-
)
396+
for col, red in zip(offset_columns, reductions):
397+
if red == "mean":
398+
df[target_column] = df[target_column] + signs_dict[col] * df[col].mean()
424399

425400
# apply offset from the offset columns
426-
df[target_column] = df.map_partitions(
427-
shift_by_row,
428-
cols=[col for col, red in zip(offset_columns, reductions) if not red],
429-
signs=signs_dict,
430-
meta=df[target_column].dtype,
431-
)
401+
for col, red in zip(offset_columns, reductions):
402+
if not red:
403+
df[target_column] = df[target_column] + signs_dict[col] * df[col]
432404

433405
# compensate shift from the preserved mean columns
434406
if any(preserve_mean):
435-
df[target_column] = df.map_partitions(
436-
shift_by_mean,
437-
cols=[col for col, pmean in zip(offset_columns, preserve_mean) if pmean],
438-
signs=signs_dict,
439-
means=means,
440-
flip_signs=True,
441-
meta=df[target_column].dtype,
442-
)
407+
for col, pmean in zip(offset_columns, preserve_mean):
408+
if pmean:
409+
df[target_column] = df[target_column] - signs_dict[col] * df[col].mean()
443410

444411
return df

0 commit comments

Comments
 (0)