Skip to content

Conversation

@codeflash-ai
Copy link

@codeflash-ai codeflash-ai bot commented Oct 28, 2025

📄 86% (0.86x) speedup for _freq_to_period in gs_quant/timeseries/technicals.py

⏱️ Runtime : 16.7 milliseconds 9.01 milliseconds (best of 38 runs)

📝 Explanation and details

Optimizations made:

  • Reduced repeated attribute access for freq (for enums, this is faster than always calling .value or using string comparison).
  • Used more direct fast-path comparisons for frequency, reducing redundant branching.
  • Used computed asfreq_val to avoid repeated logic in the business day case.
  • Exploited local variable lookups (faster than repeated attribute/dict lookup).
  • No behavioral changes; original logic flow is fully preserved.
  • No changes to exception type, message, or function signature.
  • Kept all original comments in place.

Note: Assumes Frequency is either an enum/type or string-like - this mirrors the type handling from the original implementation, and optimizations for comparison assume that value or direct string are used depending on Frequency implementation.

Correctness verification report:

Test Status
⚙️ Existing Unit Tests 🔘 None Found
🌀 Generated Regression Tests 22 Passed
⏪ Replay Tests 🔘 None Found
🔎 Concolic Coverage Tests 🔘 None Found
📊 Tests Coverage 70.0%
🌀 Generated Regression Tests and Runtime
from datetime import datetime, timedelta
from enum import Enum

import pandas as pd
# imports
import pytest
from gs_quant.timeseries.technicals import _freq_to_period


# Define Frequency Enum as used by _freq_to_period
class Frequency(Enum):
    YEAR = 'YEAR'
    QUARTER = 'QUARTER'
    MONTH = 'MONTH'
    WEEKLY = 'WEEKLY'

# Custom error as referenced in the function
class MqValueError(ValueError):
    pass

# Minimal stub for statsmodels.tsa.seasonal.freq_to_period
def _freq_to_period_statsmodels_stub(freq):
    # Mapping based on pandas frequencies
    freq_map = {
        'D': 7,
        'B': 5,
        'W': 52,
        'M': 12,
        'ME': 12,
        'MS': 12,
        'QE-DEC': 4,
        'QE': 4,
        'QS': 4,
        'Q': 4,
    }
    return freq_map.get(freq, None)
from gs_quant.timeseries.technicals import _freq_to_period

# ------------------ UNIT TESTS ------------------

# Basic Test Cases

def test_daily_series_year_period():
    # Test daily frequency, YEAR period
    dates = pd.date_range('2022-01-01', periods=10, freq='D')
    s = pd.Series(range(10), index=dates)
    result, period = _freq_to_period(s, Frequency.YEAR) # 380μs -> 388μs (2.07% slower)

def test_daily_series_quarter_period():
    # Test daily frequency, QUARTER period
    dates = pd.date_range('2022-01-01', periods=10, freq='D')
    s = pd.Series(range(10), index=dates)
    result, period = _freq_to_period(s, Frequency.QUARTER) # 368μs -> 369μs (0.228% slower)

def test_daily_series_month_period():
    # Test daily frequency, MONTH period
    dates = pd.date_range('2022-01-01', periods=10, freq='D')
    s = pd.Series(range(10), index=dates)
    result, period = _freq_to_period(s, Frequency.MONTH) # 366μs -> 369μs (0.753% slower)

def test_daily_series_weekly_period():
    # Test daily frequency, WEEKLY period
    dates = pd.date_range('2022-01-01', periods=10, freq='D')
    s = pd.Series(range(10), index=dates)
    result, period = _freq_to_period(s, Frequency.WEEKLY) # 368μs -> 378μs (2.67% slower)

def test_business_day_series_year_period():
    # Test business day frequency, YEAR period
    dates = pd.date_range('2022-01-01', periods=10, freq='B')
    s = pd.Series(range(10), index=dates)
    result, period = _freq_to_period(s, Frequency.YEAR) # 439μs -> 521μs (15.8% slower)

def test_business_day_series_month_period():
    # Test business day frequency, MONTH period
    dates = pd.date_range('2022-01-01', periods=10, freq='B')
    s = pd.Series(range(10), index=dates)
    result, period = _freq_to_period(s, Frequency.MONTH) # 432μs -> 521μs (17.1% slower)

def test_business_day_series_weekly_period():
    # Test business day frequency, WEEKLY period
    dates = pd.date_range('2022-01-01', periods=10, freq='B')
    s = pd.Series(range(10), index=dates)
    result, period = _freq_to_period(s, Frequency.WEEKLY) # 440μs -> 523μs (15.8% slower)







def test_empty_series():
    # Empty series with DateTimeIndex should not raise, but return correct period
    dates = pd.to_datetime([])
    s = pd.Series([], index=dates)
    result, period = _freq_to_period(s, Frequency.YEAR) # 154μs -> 157μs (1.69% slower)

def test_series_with_nans():
    # Series with NaNs should be forward-filled
    dates = pd.date_range('2022-01-01', periods=5, freq='D')
    s = pd.Series([None, 2, None, 4, 5], index=dates)
    result, period = _freq_to_period(s, Frequency.YEAR) # 386μs -> 391μs (1.16% slower)



def test_series_with_irregular_freq():
    # Irregular frequency (not inferred by pandas)
    dates = [datetime(2022, 1, 1), datetime(2022, 1, 3), datetime(2022, 1, 7)]
    s = pd.Series([1, 2, 3], index=pd.DatetimeIndex(dates))
    result, period = _freq_to_period(s, Frequency.YEAR) # 536μs -> 537μs (0.345% slower)


def test_large_daily_series():
    # Large daily series (1000 elements)
    dates = pd.date_range('2022-01-01', periods=1000, freq='D')
    s = pd.Series(range(1000), index=dates)
    result, period = _freq_to_period(s, Frequency.YEAR) # 278μs -> 288μs (3.57% slower)

def test_large_business_day_series():
    # Large business day series (1000 elements)
    dates = pd.date_range('2022-01-01', periods=1000, freq='B')
    s = pd.Series(range(1000), index=dates)
    result, period = _freq_to_period(s, Frequency.YEAR) # 4.48ms -> 449μs (898% faster)



#------------------------------------------------
from datetime import datetime, timedelta
from enum import Enum

import pandas as pd
# imports
import pytest
from gs_quant.timeseries.technicals import _freq_to_period


# Minimal implementation of Frequency Enum for testing
class Frequency(Enum):
    YEAR = 'YEAR'
    QUARTER = 'QUARTER'
    MONTH = 'MONTH'
    WEEKLY = 'WEEKLY'

# Minimal MqValueError for testing
class MqValueError(ValueError):
    pass

# Minimal freq_to_period function for testing
def freq_to_period(freq_str):
    # Mapping based on pandas/statsmodels conventions
    mapping = {
        'D': 7,
        'B': 5,
        'W': 52,
        'M': 12,
        'MS': 12,
        'ME': 12,
        'QS': 4,
        'QE': 4,
        'Q': 4,
    }
    return mapping.get(freq_str, None)
from gs_quant.timeseries.technicals import _freq_to_period

# ------------------------------
# Unit Tests for _freq_to_period
# ------------------------------

# ----------- BASIC TEST CASES -----------
def test_daily_series_yearly_period():
    # Test daily frequency, yearly period
    dates = pd.date_range('2022-01-01', periods=365, freq='D')
    s = pd.Series(range(365), index=dates)
    result, period = _freq_to_period(s, Frequency.YEAR) # 307μs -> 310μs (0.874% slower)

def test_daily_series_monthly_period():
    # Test daily frequency, monthly period
    dates = pd.date_range('2022-01-01', periods=60, freq='D')
    s = pd.Series(range(60), index=dates)
    result, period = _freq_to_period(s, Frequency.MONTH) # 271μs -> 285μs (4.93% slower)

def test_business_day_series_weekly_period():
    # Test business day frequency, weekly period
    dates = pd.date_range('2022-01-03', periods=20, freq='B')
    s = pd.Series(range(20), index=dates)
    result, period = _freq_to_period(s, Frequency.WEEKLY) # 485μs -> 533μs (9.04% slower)




def test_series_with_missing_dates():
    # Series with missing dates, should be forward filled
    dates = pd.to_datetime(['2022-01-01', '2022-01-03', '2022-01-05'])
    s = pd.Series([1, 2, 3], index=dates)
    result, period = _freq_to_period(s, Frequency.MONTH) # 503μs -> 501μs (0.233% faster)
    # Should expand to daily frequency and ffill
    expected_dates = pd.date_range('2022-01-01', '2022-01-05', freq='D')

def test_series_with_nans():
    # Series with NaNs, should be forward filled
    dates = pd.date_range('2022-01-01', periods=10, freq='D')
    s = pd.Series([None, 1, None, 2, None, None, 3, None, None, 4], index=dates)
    result, period = _freq_to_period(s, Frequency.MONTH) # 373μs -> 386μs (3.28% slower)



def test_business_day_series_quarterly_period():
    # Business day series, quarterly period
    dates = pd.date_range('2022-01-03', periods=60, freq='B')
    s = pd.Series(range(60), index=dates)
    result, period = _freq_to_period(s, Frequency.QUARTER) # 571μs -> 453μs (26.0% faster)

def test_series_with_unusual_freq():
    # Series with no inferred frequency (irregular dates)
    dates = pd.to_datetime(['2022-01-01', '2022-01-04', '2022-01-10'])
    s = pd.Series([1, 2, 3], index=dates)
    result, period = _freq_to_period(s, Frequency.MONTH) # 507μs -> 503μs (0.756% faster)

# ----------- LARGE SCALE TEST CASES -----------
def test_large_daily_series_yearly_period():
    # Large daily series, yearly period
    dates = pd.date_range('2020-01-01', periods=1000, freq='D')
    s = pd.Series(range(1000), index=dates)
    result, period = _freq_to_period(s, Frequency.YEAR) # 266μs -> 269μs (1.33% slower)

def test_large_business_day_series_monthly_period():
    # Large business day series, monthly period
    dates = pd.date_range('2020-01-01', periods=1000, freq='B')
    s = pd.Series(range(1000), index=dates)
    result, period = _freq_to_period(s, Frequency.MONTH) # 4.40ms -> 452μs (874% faster)



def test_large_series_with_missing_dates_and_nans():
    # Large series with missing dates and NaNs
    dates = pd.date_range('2020-01-01', periods=1000, freq='D')
    s = pd.Series([i if i % 10 != 0 else None for i in range(1000)], index=dates)
    # Remove some dates to make it irregular
    s = s.drop(s.index[::50])
    result, period = _freq_to_period(s, Frequency.MONTH) # 416μs -> 415μs (0.270% faster)
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.

To edit these changes git checkout codeflash/optimize-_freq_to_period-mhb6elb7 and push.

Codeflash

**Optimizations made:**
- Reduced repeated attribute access for `freq` (for enums, this is faster than always calling `.value` or using string comparison).
- Used more direct fast-path comparisons for frequency, reducing redundant branching.
- Used computed `asfreq_val` to avoid repeated logic in the business day case.
- Exploited local variable lookups (faster than repeated attribute/dict lookup).
- No behavioral changes; original logic flow is fully preserved.
- No changes to exception type, message, or function signature.
- Kept all original comments in place.

**Note:** Assumes `Frequency` is either an enum/type or string-like - this mirrors the type handling from the original implementation, and optimizations for comparison assume that `value` or direct string are used depending on `Frequency` implementation.
@codeflash-ai codeflash-ai bot requested a review from mashraf-222 October 28, 2025 23:06
@codeflash-ai codeflash-ai bot added the ⚡️ codeflash Optimization PR opened by Codeflash AI label Oct 28, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

⚡️ codeflash Optimization PR opened by Codeflash AI

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant