Merge pull request #237 from codelion/fix-package-install

codelion · web-flow · commit f9c01c2624f6 · 2025-09-04T16:16:15.000+08:00
Fix package install
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,3 +1,4 @@
+include optillm.py
 include optillm/plugins/*.py
 include optillm/cepo/*.py
 include optillm/cepo/configs/*.yaml
diff --git a/optillm/__init__.py b/optillm/__init__.py
@@ -2,7 +2,7 @@
 import os
 
 # Version information
-__version__ = "0.2.0"
+__version__ = "0.2.1"
 
 # Get the path to the root optillm.py
 spec = util.spec_from_file_location(
diff --git a/optillm/plugins/longcepo/mapreduce.py b/optillm/plugins/longcepo/mapreduce.py
@@ -158,8 +158,8 @@ def fetch_map_response(client, model, chunk, query, system_prompt, summary):
         tokenizer,
         cb_log,
         longcepo_config,
+        irrelevance_tags,
     )
-    result = remove_chunks(result, irrelevance_tags)
     if not result:
         return "No information", cb_log
 
@@ -200,6 +200,7 @@ def collapse_chunks(
     tokenizer,
     cb_log: CBLog,
     longcepo_config: LongCepoConfig,
+    irrelevance_tags: Tuple[str] = ("[NO INFORMATION]",),
 ) -> Tuple[List[str], CBLog]:
     """
     Collapses context chunk pairs in sliding window until the total token count fits within the context window.
@@ -221,7 +222,7 @@ def collapse_chunks(
     num_tokens = get_prompt_length(format_chunk_list(context_chunks), tokenizer)
     token_budget = (
         longcepo_config.max_context_window
-        - get_prompt_length(longcepo_config.collapse_prompt, tokenizer)
+        - get_prompt_length(longcepo_config.reduce_prompt, tokenizer)
         - longcepo_config.max_output_tokens
     )
     logger.info(f"Pre-collapse length of chunks {num_tokens}, allowed {token_budget}")
@@ -269,6 +270,7 @@ def fetch_collapse_response(client, model, docs, query, system_prompt):
             system_prompt,
             cb_log,
         )
+        context_chunks = remove_chunks(context_chunks, irrelevance_tags)
         merge_pair_idx = (merge_pair_idx + 1) % max(len(context_chunks) - 1, 1)
         num_tokens = get_prompt_length(format_chunk_list(context_chunks), tokenizer)
         collapse_step += 1
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "optillm"
-version = "0.2.0"
+version = "0.2.1"
 description = "An optimizing inference proxy for LLMs."
 readme = "README.md"
 license = "Apache-2.0"
@@ -81,4 +81,7 @@ optillm = [
     "plugins/*.py",
     "cepo/*.py",
     "cepo/configs/*.yaml",
-]
+]
+
+[tool.setuptools.data-files]
+"" = ["optillm.py"]