From c31eed855116605d3bbcaff76cec33b5bf41d5d9 Mon Sep 17 00:00:00 2001 From: Sergey Penkovsky Date: Sun, 5 Oct 2025 20:47:36 +0300 Subject: [PATCH] fix(hf-integration): handle logits as tuple in hf_adapter, convert torch.Tensor to list in hf_tokenizer.decode for decoding compatibility --- hf-proxy/src/hf_proxy/hf_adapter.py | 6 +++++- hf-proxy/src/hf_proxy/hf_tokenizer.py | 22 ++++++++++++++++++---- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/hf-proxy/src/hf_proxy/hf_adapter.py b/hf-proxy/src/hf_proxy/hf_adapter.py index fa89ac4..3b12124 100644 --- a/hf-proxy/src/hf_proxy/hf_adapter.py +++ b/hf-proxy/src/hf_proxy/hf_adapter.py @@ -99,7 +99,11 @@ class HFGPTAdapter(PreTrainedModel): return_dict = return_dict if return_dict is not None else self.config.use_return_dict # Основной forward pass - logits = self.llm_model(input_ids) + outputs = self.llm_model(input_ids) + if isinstance(outputs, tuple): + logits = outputs[0] + else: + logits = outputs loss = None if labels is not None: diff --git a/hf-proxy/src/hf_proxy/hf_tokenizer.py b/hf-proxy/src/hf_proxy/hf_tokenizer.py index 3a1df1a..34d66aa 100644 --- a/hf-proxy/src/hf_proxy/hf_tokenizer.py +++ b/hf-proxy/src/hf_proxy/hf_tokenizer.py @@ -56,10 +56,24 @@ class HFTokenizerAdapter: add_special_tokens = kwargs.get('add_special_tokens', True) # Кодируем текст - input_ids = self.llm_tokenizer.encode( - text, - add_special_tokens=add_special_tokens - ) + #input_ids = self.llm_tokenizer.encode( + # text, + # add_special_tokens=add_special_tokens + #) + if isinstance(text, str): + input_ids = self.llm_tokenizer.encode( + text, + add_special_tokens=add_special_tokens + ) + input_ids = [input_ids] # <-- оборачиваем в batch + else: + # Список строк, батч-режим! + input_ids = [ + self.llm_tokenizer.encode( + t, + add_special_tokens=add_special_tokens + ) for t in text + ] # Применяем truncation if truncation and max_length is not None and len(input_ids) > max_length: