From 6dd778ed705e7390d9870909b1fce941326c79fc Mon Sep 17 00:00:00 2001 From: zrguo <49157727+LarFii@users.noreply.github.com> Date: Sun, 3 Nov 2024 17:53:53 +0800 Subject: [PATCH] Update utils.py --- lightrag/utils.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/lightrag/utils.py b/lightrag/utils.py index 7b17cbb6..254f5dad 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -176,11 +176,6 @@ def truncate_list_by_token_size(list_data: list, key: callable, max_token_size: return list_data[:i] return list_data - -# def list_of_list_to_csv(data: list[list]): -# return "\n".join( -# [",\t".join([str(data_dd) for data_dd in data_d]) for data_d in data] -# ) def list_of_list_to_csv(data: List[List[str]]) -> str: output = io.StringIO() writer = csv.writer(output) @@ -258,12 +253,11 @@ def xml_to_json(xml_file): print(f"An error occurred: {e}") return None -#混合检索中的合并函数 def process_combine_contexts(hl, ll): header = None list_hl = csv_string_to_list(hl.strip()) list_ll = csv_string_to_list(ll.strip()) - # 去掉第一个元素(如果不为空) + if list_hl: header=list_hl[0] list_hl = list_hl[1:] @@ -272,24 +266,21 @@ def process_combine_contexts(hl, ll): list_ll = list_ll[1:] if header is None: return "" - # 去掉每个子元素中的第一个元素(如果不为空),再转为一维数组,用于合并去重 + if list_hl: list_hl = [','.join(item[1:]) for item in list_hl if item] if list_ll: list_ll = [','.join(item[1:]) for item in list_ll if item] - # 合并并去重 combined_sources_set = set( filter(None, list_hl + list_ll) ) - # 创建包含头部的新列表 combined_sources = [",\t".join(header)] - # 为 combined_sources_set 中的每个元素添加自增数字 + for i, item in enumerate(combined_sources_set, start=1): combined_sources.append(f"{i},\t{item}") - # 将列表转换为字符串,子元素之间用换行符分隔 combined_sources = "\n".join(combined_sources) return combined_sources