Skip to content

Conversation

@codeflash-ai
Copy link

@codeflash-ai codeflash-ai bot commented Oct 28, 2025

📄 8% (0.08x) speedup for PositionSet.clone in gs_quant/markets/position_set.py

⏱️ Runtime : 106 milliseconds 97.8 milliseconds (best of 16 runs)

📝 Explanation and details

Optimizations applied:

  • In to_frame and from_frame, preallocate list sizes where possible to avoid resizing.
  • Replace repeated attribute lookups with local variable assignments in tight loops.
  • In clone, perform only one lookup for frame.columns.
  • In from_frame, cache column names lowercased in a set for fast lookup.
  • In loops, assign constructors to local variables (Position, PositionTag) to avoid repeated name lookup.
  • Keep argument passing/semantics and call structure identical for behavioral preservation.
  • No changes are made to business logic, type annotations, or functional behavior.
    All code style and comments as per instruction are preserved; no unnecessary comments or rearrangement.

Correctness verification report:

Test Status
⚙️ Existing Unit Tests 🔘 None Found
🌀 Generated Regression Tests 98 Passed
⏪ Replay Tests 🔘 None Found
🔎 Concolic Coverage Tests 🔘 None Found
📊 Tests Coverage 100.0%
🌀 Generated Regression Tests and Runtime
import datetime as dt

import pandas as pd
# imports
import pytest
from gs_quant.markets.position_set import PositionSet


# Minimal stub classes to support PositionSet and clone
class MqValueError(Exception):
    pass

class PositionTag:
    def __init__(self, name, value):
        self.name = name
        self.value = value

    def __eq__(self, other):
        return isinstance(other, PositionTag) and self.name == other.name and self.value == other.value

class Position:
    def __init__(self, identifier, asset_id=None, name=None, weight=None, quantity=None, notional=None, tags=None):
        self.identifier = identifier
        self.asset_id = asset_id or identifier  # For sorting and equality
        self.name = name
        self.weight = weight
        self.quantity = quantity
        self.notional = notional
        self.tags = tags or []

    def __eq__(self, other):
        return (
            isinstance(other, Position)
            and self.identifier == other.identifier
            and self.asset_id == other.asset_id
            and self.name == other.name
            and self.weight == other.weight
            and self.quantity == other.quantity
            and self.notional == other.notional
            and self.tags == other.tags
        )

    def as_dict(self, tags_as_keys=False):
        d = {
            'identifier': self.identifier,
            'id': self.asset_id,
            'name': self.name,
            'weight': self.weight,
            'quantity': self.quantity,
            'notional': self.notional
        }
        if tags_as_keys and self.tags:
            for tag in self.tags:
                d[tag.name] = tag.value
        return d

# ================================
# Unit Tests for clone
# ================================

# 1. Basic Test Cases

def test_clone_simple_quantity_positions():
    # Test cloning a PositionSet with quantity-based positions
    pos1 = Position(identifier='AAPL UW', quantity=100)
    pos2 = Position(identifier='MSFT UW', quantity=200)
    ps = PositionSet([pos1, pos2], date=dt.date(2024, 1, 1))
    codeflash_output = ps.clone(); clone_ps = codeflash_output # 1.02ms -> 906μs (13.1% faster)
    # The positions should be equal but not the same objects
    for p1, p2 in zip(ps.positions, clone_ps.positions):
        pass

def test_clone_with_weights_and_reference_notional():
    # Test cloning a PositionSet with weights and reference notional
    pos1 = Position(identifier='AAPL UW', weight=0.6)
    pos2 = Position(identifier='MSFT UW', weight=0.4)
    ps = PositionSet([pos1, pos2], reference_notional=1000000, date=dt.date(2023, 12, 31))
    codeflash_output = ps.clone(); clone_ps = codeflash_output # 1.00ms -> 876μs (14.4% faster)

def test_clone_preserves_tags():
    # Test that tags are preserved in the clone
    tag1 = PositionTag('Sector', 'Tech')
    pos1 = Position(identifier='AAPL UW', quantity=50, tags=[tag1])
    ps = PositionSet([pos1])
    codeflash_output = ps.clone(); clone_ps = codeflash_output # 1.02ms -> 901μs (13.3% faster)

def test_clone_with_divisor():
    # Test that divisor is preserved in the clone
    pos1 = Position(identifier='AAPL UW', quantity=10)
    ps = PositionSet([pos1], divisor=2.0)
    codeflash_output = ps.clone(); clone_ps = codeflash_output # 1.04ms -> 929μs (11.7% faster)

def test_clone_with_empty_positions():
    # Test cloning an empty PositionSet
    ps = PositionSet([])
    codeflash_output = ps.clone(); clone_ps = codeflash_output

# 2. Edge Test Cases




def test_clone_keep_reference_notional_true():
    # If keep_reference_notional is True, quantity column is dropped and reference_notional is preserved
    pos1 = Position(identifier='AAPL UW', weight=0.5)
    pos2 = Position(identifier='MSFT UW', weight=0.5)
    ps = PositionSet([pos1, pos2], reference_notional=1000000)
    codeflash_output = ps.clone(keep_reference_notional=True); clone_ps = codeflash_output # 1.58ms -> 1.35ms (16.7% faster)


def test_clone_with_tags_and_extra_columns():
    # Test cloning when positions have extra columns (tags)
    df = pd.DataFrame([
        {'identifier': 'AAPL UW', 'quantity': 10, 'Sector': 'Tech'},
        {'identifier': 'MSFT UW', 'quantity': 20, 'Sector': 'Tech'}
    ])
    ps = PositionSet.from_frame(df)
    codeflash_output = ps.clone(); clone_ps = codeflash_output # 641μs -> 534μs (20.1% faster)
    # Tags should be preserved
    for p in clone_ps.positions:
        pass

def test_clone_with_nan_identifier():
    # Test that positions with NaN identifier are ignored
    df = pd.DataFrame([
        {'identifier': 'AAPL UW', 'quantity': 10},
        {'identifier': float('nan'), 'quantity': 20}
    ])
    ps = PositionSet.from_frame(df)
    codeflash_output = ps.clone(); clone_ps = codeflash_output # 606μs -> 538μs (12.7% faster)

def test_clone_preserves_date():
    # Test that the date is preserved in the clone
    date = dt.date(2022, 5, 17)
    pos1 = Position(identifier='AAPL UW', quantity=10)
    ps = PositionSet([pos1], date=date)
    codeflash_output = ps.clone(); clone_ps = codeflash_output # 977μs -> 824μs (18.6% faster)

# 3. Large Scale Test Cases

def test_clone_large_position_set():
    # Test cloning a PositionSet with 1000 positions
    positions = [Position(identifier=f'ASSET{i}', quantity=i) for i in range(1000)]
    ps = PositionSet(positions, date=dt.date(2020, 1, 1))
    codeflash_output = ps.clone(); clone_ps = codeflash_output # 12.8ms -> 11.8ms (8.22% faster)
    # Spot check a few positions
    for i in [0, 499, 999]:
        pass

def test_clone_large_position_set_with_tags():
    # Test cloning a PositionSet with 500 positions and tags
    positions = [
        Position(identifier=f'ASSET{i}', quantity=i, tags=[PositionTag('Group', 'A' if i%2==0 else 'B')])
        for i in range(500)
    ]
    ps = PositionSet(positions, date=dt.date(2021, 7, 1))
    codeflash_output = ps.clone(); clone_ps = codeflash_output # 10.2ms -> 9.80ms (4.46% faster)
    # Check tags are preserved
    for i in [0, 249, 499]:
        pass

def test_clone_performance_with_large_data():
    # Test that cloning a large PositionSet does not take excessive time or memory
    positions = [Position(identifier=f'ASSET{i}', quantity=i) for i in range(999)]
    ps = PositionSet(positions)
    codeflash_output = ps.clone(); clone_ps = codeflash_output # 12.9ms -> 11.8ms (9.24% faster)

# Additional edge: test that clone returns a deep copy (modifying clone doesn't affect original)
def test_clone_is_deep_copy():
    pos1 = Position(identifier='AAPL UW', quantity=10)
    ps = PositionSet([pos1])
    codeflash_output = ps.clone(); clone_ps = codeflash_output # 972μs -> 892μs (8.98% faster)
    # Modify clone
    clone_ps.positions[0].quantity = 20

# Additional: test that clone works with positions with None fields
def test_clone_with_none_fields():
    pos1 = Position(identifier='AAPL UW', quantity=None, weight=None, notional=None)
    ps = PositionSet([pos1])
    codeflash_output = ps.clone(); clone_ps = codeflash_output # 885μs -> 746μs (18.6% faster)
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
#------------------------------------------------
import datetime as dt

import pandas as pd
# imports
import pytest
from gs_quant.markets.position_set import PositionSet


# Minimal Position and PositionTag classes for testing
class PositionTag:
    def __init__(self, name, value):
        self.name = name
        self.value = value

    def __eq__(self, other):
        return isinstance(other, PositionTag) and self.name == other.name and self.value == other.value

class Position:
    def __init__(self, identifier, asset_id=None, name=None, weight=None, quantity=None, notional=None, tags=None):
        self.identifier = identifier
        self.asset_id = asset_id or identifier
        self.name = name
        self.weight = weight
        self.quantity = quantity
        self.notional = notional
        self.tags = tags or []

    def as_dict(self, tags_as_keys=False):
        d = {
            'identifier': self.identifier,
            'id': self.asset_id,
            'name': self.name,
            'weight': self.weight,
            'quantity': self.quantity,
            'notional': self.notional
        }
        if tags_as_keys and self.tags:
            for tag in self.tags:
                d[tag.name] = tag.value
        return d

    def __eq__(self, other):
        if not isinstance(other, Position):
            return False
        return (self.identifier == other.identifier and
                self.asset_id == other.asset_id and
                self.name == other.name and
                self.weight == other.weight and
                self.quantity == other.quantity and
                self.notional == other.notional and
                self.tags == other.tags)

# ---------------------------
# Unit tests for clone method
# ---------------------------

# 1. Basic Test Cases

def test_clone_basic_quantity():
    # Basic: positions with quantity only
    p1 = Position('AAPL UW', quantity=100)
    p2 = Position('MSFT UW', quantity=200)
    ps = PositionSet([p1, p2], date=dt.date(2022, 1, 1))
    codeflash_output = ps.clone(); clone_ps = codeflash_output # 1.02ms -> 901μs (13.5% faster)

def test_clone_basic_weight():
    # Basic: positions with weight only
    p1 = Position('AAPL UW', weight=0.6)
    p2 = Position('MSFT UW', weight=0.4)
    ps = PositionSet([p1, p2], date=dt.date(2022, 1, 2))
    codeflash_output = ps.clone(); clone_ps = codeflash_output # 1.01ms -> 866μs (16.7% faster)

def test_clone_basic_with_tags():
    # Basic: positions with tags
    tag1 = PositionTag('Sector', 'Tech')
    tag2 = PositionTag('Region', 'US')
    p1 = Position('AAPL UW', quantity=50, tags=[tag1, tag2])
    ps = PositionSet([p1], date=dt.date(2022, 1, 3))
    codeflash_output = ps.clone(); clone_ps = codeflash_output # 1.04ms -> 927μs (11.7% faster)

def test_clone_basic_with_divisor():
    # Basic: positions with divisor
    p1 = Position('AAPL UW', quantity=10)
    ps = PositionSet([p1], date=dt.date(2022, 1, 4), divisor=2.5)
    codeflash_output = ps.clone(); clone_ps = codeflash_output # 1.05ms -> 921μs (13.7% faster)

def test_clone_basic_with_reference_notional_and_keep_flag():
    # Basic: reference notional, keep_reference_notional True
    p1 = Position('AAPL UW', weight=0.5)
    p2 = Position('MSFT UW', weight=0.5)
    ps = PositionSet([p1, p2], date=dt.date(2022, 1, 5), reference_notional=1000.0)
    codeflash_output = ps.clone(keep_reference_notional=True); clone_ps = codeflash_output # 1.29ms -> 1.12ms (15.1% faster)


def test_clone_empty_positions():
    # Edge: empty position set
    ps = PositionSet([], date=dt.date(2022, 1, 7))
    codeflash_output = ps.clone(); clone_ps = codeflash_output

def test_clone_single_position():
    # Edge: single position
    p1 = Position('AAPL UW', quantity=1)
    ps = PositionSet([p1], date=dt.date(2022, 1, 8))
    codeflash_output = ps.clone(); clone_ps = codeflash_output # 1.24ms -> 1.14ms (9.04% faster)

def test_clone_positions_with_none_fields():
    # Edge: positions with None for optional fields
    p1 = Position('AAPL UW')
    p2 = Position('MSFT UW', quantity=None, weight=None, notional=None)
    ps = PositionSet([p1, p2], date=dt.date(2022, 1, 9))
    codeflash_output = ps.clone(); clone_ps = codeflash_output # 934μs -> 832μs (12.2% faster)

def test_clone_positions_with_tags_and_missing_tag_value():
    # Edge: tags with None value
    tag1 = PositionTag('Sector', None)
    p1 = Position('AAPL UW', quantity=100, tags=[tag1])
    ps = PositionSet([p1], date=dt.date(2022, 1, 10))
    codeflash_output = ps.clone(); clone_ps = codeflash_output # 1.04ms -> 899μs (15.7% faster)

def test_clone_with_unusual_identifiers():
    # Edge: unusual identifier values
    p1 = Position('', quantity=1)
    p2 = Position(None, quantity=2)
    ps = PositionSet([p1, p2], date=dt.date(2022, 1, 11))
    codeflash_output = ps.clone(); clone_ps = codeflash_output # 1.09ms -> 973μs (12.3% faster)

def test_clone_with_duplicate_positions():
    # Edge: duplicate positions
    p1 = Position('AAPL UW', quantity=100)
    ps = PositionSet([p1, p1], date=dt.date(2022, 1, 12))
    codeflash_output = ps.clone(); clone_ps = codeflash_output # 981μs -> 877μs (11.8% faster)

def test_clone_with_reference_notional_and_invalid_positions():
    # Edge: should raise error if reference_notional and position has notional
    p1 = Position('AAPL UW', weight=0.5, notional=100)
    with pytest.raises(ValueError):
        PositionSet([p1], date=dt.date(2022, 1, 13), reference_notional=1000.0)


def test_clone_with_reference_notional_and_quantity():
    # Edge: should raise error if reference_notional and position has quantity
    p1 = Position('AAPL UW', weight=0.5, quantity=10)
    with pytest.raises(ValueError):
        PositionSet([p1], date=dt.date(2022, 1, 15), reference_notional=1000.0)

def test_clone_with_various_date_types():
    # Edge: test with datetime.date and datetime.datetime
    p1 = Position('AAPL UW', quantity=10)
    ps = PositionSet([p1], date=dt.date(2022, 2, 1))
    codeflash_output = ps.clone(); clone_ps = codeflash_output # 1.02ms -> 901μs (12.8% faster)
    ps2 = PositionSet([p1], date=dt.datetime(2022, 2, 1))
    codeflash_output = ps2.clone(); clone_ps2 = codeflash_output # 851μs -> 727μs (17.2% faster)

# 3. Large Scale Test Cases

def test_clone_large_number_of_positions():
    # Large scale: 1000 positions with quantity
    positions = [Position(f'ID{i}', quantity=i) for i in range(1000)]
    ps = PositionSet(positions, date=dt.date(2022, 3, 1))
    codeflash_output = ps.clone(); clone_ps = codeflash_output # 12.7ms -> 11.9ms (6.41% faster)

def test_clone_large_number_of_tags():
    # Large scale: 100 positions, each with 10 tags
    tags = [PositionTag(f'Tag{i}', f'Value{i}') for i in range(10)]
    positions = [Position(f'ID{i}', quantity=i, tags=tags) for i in range(100)]
    ps = PositionSet(positions, date=dt.date(2022, 3, 2))
    codeflash_output = ps.clone(); clone_ps = codeflash_output # 9.05ms -> 8.85ms (2.27% faster)
    for pos in clone_ps.positions:
        pass

def test_clone_large_weights_and_reference_notional():
    # Large scale: 500 positions with weights and reference_notional
    positions = [Position(f'ID{i}', weight=1/500) for i in range(500)]
    ps = PositionSet(positions, date=dt.date(2022, 3, 3), reference_notional=50000.0)
    codeflash_output = ps.clone(keep_reference_notional=True); clone_ps = codeflash_output # 7.12ms -> 6.58ms (8.28% faster)

def test_clone_performance():
    # Large scale: test that clone completes in reasonable time for 999 positions
    import time
    positions = [Position(f'ID{i}', quantity=i) for i in range(999)]
    ps = PositionSet(positions, date=dt.date(2022, 3, 4))
    start = time.time()
    codeflash_output = ps.clone(); clone_ps = codeflash_output # 12.6ms -> 11.8ms (6.44% faster)
    duration = time.time() - start

def test_clone_large_with_tags_and_reference_notional():
    # Large scale: 100 positions, each with tags and reference_notional
    tags = [PositionTag(f'Tag{i}', f'Value{i}') for i in range(5)]
    positions = [Position(f'ID{i}', weight=1/100, tags=tags) for i in range(100)]
    ps = PositionSet(positions, date=dt.date(2022, 3, 5), reference_notional=100000.0)
    codeflash_output = ps.clone(keep_reference_notional=True); clone_ps = codeflash_output # 5.94ms -> 5.63ms (5.47% faster)
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.

To edit these changes git checkout codeflash/optimize-PositionSet.clone-mhao6mab and push.

Codeflash

**Optimizations applied:**
- In `to_frame` and `from_frame`, preallocate list sizes where possible to avoid resizing.
- Replace repeated attribute lookups with local variable assignments in tight loops.
- In `clone`, perform only one lookup for `frame.columns`.
- In `from_frame`, cache column names lowercased in a set for fast lookup.
- In loops, assign constructors to local variables (`Position`, `PositionTag`) to avoid repeated name lookup.
- Keep argument passing/semantics and call structure identical for behavioral preservation.
- No changes are made to business logic, type annotations, or functional behavior.  
**All code style and comments as per instruction are preserved; no unnecessary comments or rearrangement.**
@codeflash-ai codeflash-ai bot requested a review from mashraf-222 October 28, 2025 14:36
@codeflash-ai codeflash-ai bot added the ⚡️ codeflash Optimization PR opened by Codeflash AI label Oct 28, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

⚡️ codeflash Optimization PR opened by Codeflash AI

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant