fix(hf-integration): handle logits as tuple in hf_adapter, convert torch.Tensor to list in hf_tokenizer.decode for decoding compatibility

2026-01-24 05:21:16 +00:00 · 2025-10-05 20:47:36 +03:00
parent 3843e64098
commit c31eed8551
2 changed files with 23 additions and 5 deletions
--- a/hf-proxy/src/hf_proxy/hf_tokenizer.py
+++ b/hf-proxy/src/hf_proxy/hf_tokenizer.py
@@ -56,10 +56,24 @@ class HFTokenizerAdapter:
        add_special_tokens = kwargs.get('add_special_tokens', True)
        
        # Кодируем текст
-        input_ids = self.llm_tokenizer.encode(
-            text, 
-            add_special_tokens=add_special_tokens
-        )
+        #input_ids = self.llm_tokenizer.encode(
+        #    text, 
+        #    add_special_tokens=add_special_tokens
+        #)
+        if isinstance(text, str):
+            input_ids = self.llm_tokenizer.encode(
+                text, 
+                add_special_tokens=add_special_tokens
+            )
+            input_ids = [input_ids]  # <-- оборачиваем в batch
+        else:
+            # Список строк, батч-режим!
+            input_ids = [
+                self.llm_tokenizer.encode(
+                    t,
+                    add_special_tokens=add_special_tokens
+                ) for t in text
+            ]
        
        # Применяем truncation
        if truncation and max_length is not None and len(input_ids) > max_length: