From 43366501934c70c034849cfeef8e56aee9b8276e Mon Sep 17 00:00:00 2001 From: Aseem Saxena Date: Tue, 20 May 2025 14:03:18 -0400 Subject: [PATCH 1/2] Update code_utils.py --- codeflash/code_utils/code_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codeflash/code_utils/code_utils.py b/codeflash/code_utils/code_utils.py index 13a844015..459ec216f 100644 --- a/codeflash/code_utils/code_utils.py +++ b/codeflash/code_utils/code_utils.py @@ -13,7 +13,7 @@ def encoded_tokens_len(s: str) -> int: '''Function for returning the approximate length of the encoded tokens It's an approximation of BPE encoding (https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf)''' - return len(s)//2 + return int(len(s)*0.3) def get_qualified_name(module_name: str, full_qualified_name: str) -> str: if not full_qualified_name: From c4a24e83e6b1beeab73bec61e5414ae5044a2779 Mon Sep 17 00:00:00 2001 From: Saurabh Misra Date: Tue, 20 May 2025 11:21:31 -0700 Subject: [PATCH 2/2] Update codeflash/code_utils/code_utils.py --- codeflash/code_utils/code_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codeflash/code_utils/code_utils.py b/codeflash/code_utils/code_utils.py index 459ec216f..507e79f74 100644 --- a/codeflash/code_utils/code_utils.py +++ b/codeflash/code_utils/code_utils.py @@ -13,7 +13,7 @@ def encoded_tokens_len(s: str) -> int: '''Function for returning the approximate length of the encoded tokens It's an approximation of BPE encoding (https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf)''' - return int(len(s)*0.3) + return int(len(s)*0.25) def get_qualified_name(module_name: str, full_qualified_name: str) -> str: if not full_qualified_name: