diff --git a/codeflash/code_utils/code_utils.py b/codeflash/code_utils/code_utils.py index 507e79f74..b1438985a 100644 --- a/codeflash/code_utils/code_utils.py +++ b/codeflash/code_utils/code_utils.py @@ -10,10 +10,14 @@ from codeflash.cli_cmds.console import logger + def encoded_tokens_len(s: str) -> int: - '''Function for returning the approximate length of the encoded tokens - It's an approximation of BPE encoding (https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf)''' - return int(len(s)*0.25) + """Function for returning the approximate length of the encoded tokens + It's an approximation of BPE encoding (https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf) + """ + # Uses integer arithmetic for faster computation + return (len(s) * 3) // 10 + def get_qualified_name(module_name: str, full_qualified_name: str) -> str: if not full_qualified_name: