77
88import jedi
99import libcst as cst
10- import tiktoken
1110from jedi .api .classes import Name
1211from libcst import CSTNode
1312
1413from codeflash .cli_cmds .console import logger
1514from codeflash .code_utils .code_extractor import add_needed_imports_from_module , find_preexisting_objects
16- from codeflash .code_utils .code_utils import get_qualified_name , path_belongs_to_site_packages
15+ from codeflash .code_utils .code_utils import get_qualified_name , path_belongs_to_site_packages , encoded_tokens_len
1716from codeflash .context .unused_definition_remover import remove_unused_definitions_by_function_names
1817from codeflash .discovery .functions_to_optimize import FunctionToOptimize
1918from codeflash .models .models import (
@@ -73,8 +72,7 @@ def get_code_optimization_context(
7372 )
7473
7574 # Handle token limits
76- tokenizer = tiktoken .encoding_for_model ("gpt-4o" )
77- final_read_writable_tokens = len (tokenizer .encode (final_read_writable_code ))
75+ final_read_writable_tokens = encoded_tokens_len (final_read_writable_code )
7876 if final_read_writable_tokens > optim_token_limit :
7977 raise ValueError ("Read-writable code has exceeded token limit, cannot proceed" )
8078
@@ -87,7 +85,7 @@ def get_code_optimization_context(
8785 )
8886 read_only_context_code = read_only_code_markdown .markdown
8987
90- read_only_code_markdown_tokens = len ( tokenizer . encode ( read_only_context_code ) )
88+ read_only_code_markdown_tokens = encoded_tokens_len ( read_only_context_code )
9189 total_tokens = final_read_writable_tokens + read_only_code_markdown_tokens
9290 if total_tokens > optim_token_limit :
9391 logger .debug ("Code context has exceeded token limit, removing docstrings from read-only code" )
@@ -96,7 +94,7 @@ def get_code_optimization_context(
9694 helpers_of_fto_dict , helpers_of_helpers_dict , project_root_path , remove_docstrings = True
9795 )
9896 read_only_context_code = read_only_code_no_docstring_markdown .markdown
99- read_only_code_no_docstring_markdown_tokens = len ( tokenizer . encode ( read_only_context_code ) )
97+ read_only_code_no_docstring_markdown_tokens = encoded_tokens_len ( read_only_context_code )
10098 total_tokens = final_read_writable_tokens + read_only_code_no_docstring_markdown_tokens
10199 if total_tokens > optim_token_limit :
102100 logger .debug ("Code context has exceeded token limit, removing read-only code" )
@@ -111,7 +109,7 @@ def get_code_optimization_context(
111109 code_context_type = CodeContextType .TESTGEN ,
112110 )
113111 testgen_context_code = testgen_code_markdown .code
114- testgen_context_code_tokens = len ( tokenizer . encode ( testgen_context_code ) )
112+ testgen_context_code_tokens = encoded_tokens_len ( testgen_context_code )
115113 if testgen_context_code_tokens > testgen_token_limit :
116114 testgen_code_markdown = extract_code_string_context_from_files (
117115 helpers_of_fto_dict ,
@@ -121,7 +119,7 @@ def get_code_optimization_context(
121119 code_context_type = CodeContextType .TESTGEN ,
122120 )
123121 testgen_context_code = testgen_code_markdown .code
124- testgen_context_code_tokens = len ( tokenizer . encode ( testgen_context_code ) )
122+ testgen_context_code_tokens = encoded_tokens_len ( testgen_context_code )
125123 if testgen_context_code_tokens > testgen_token_limit :
126124 raise ValueError ("Testgen code context has exceeded token limit, cannot proceed" )
127125
0 commit comments