From ba256144236deaf1bb8c46badd134263410a39ad Mon Sep 17 00:00:00 2001 From: Sebastian Schramm Date: Thu, 28 Nov 2024 14:28:29 +0100 Subject: [PATCH] fix templating of language in prompts --- lightrag/operate.py | 9 ++++++++- lightrag/prompt.py | 6 +++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/lightrag/operate.py b/lightrag/operate.py index c761519f..94cd412b 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -59,6 +59,9 @@ async def _handle_entity_relation_summary( llm_max_tokens = global_config["llm_model_max_token_size"] tiktoken_model_name = global_config["tiktoken_model_name"] summary_max_tokens = global_config["entity_summary_to_max_tokens"] + language = global_config["addon_params"].get( + "language", PROMPTS["DEFAULT_LANGUAGE"] + ) tokens = encode_string_by_tiktoken(description, model_name=tiktoken_model_name) if len(tokens) < summary_max_tokens: # No need for summary @@ -70,6 +73,7 @@ async def _handle_entity_relation_summary( context_base = dict( entity_name=entity_or_relation_name, description_list=use_description.split(GRAPH_FIELD_SEP), + language=language, ) use_prompt = prompt_template.format(**context_base) logger.debug(f"Trigger summary: {entity_or_relation_name}") @@ -444,6 +448,9 @@ async def kg_query( ) else: examples = "\n".join(PROMPTS["keywords_extraction_examples"]) + language = global_config["addon_params"].get( + "language", PROMPTS["DEFAULT_LANGUAGE"] + ) # Set mode if query_param.mode not in ["local", "global", "hybrid"]: @@ -453,7 +460,7 @@ async def kg_query( # LLM generate keywords use_model_func = global_config["llm_model_func"] kw_prompt_temp = PROMPTS["keywords_extraction"] - kw_prompt = kw_prompt_temp.format(query=query, examples=examples) + kw_prompt = kw_prompt_temp.format(query=query, examples=examples, language=language) result = await use_model_func(kw_prompt) logger.info("kw_prompt result:") print(result) diff --git a/lightrag/prompt.py b/lightrag/prompt.py index 0d4e599d..5e71c081 100644 --- a/lightrag/prompt.py +++ b/lightrag/prompt.py @@ -33,7 +33,7 @@ 3. Identify high-level key words that summarize the main concepts, themes, or topics of the entire text. These should capture the overarching ideas present in the document. Format the content-level key words as ("content_keywords"{tuple_delimiter}) -4. Return output in English as a single list of all the entities and relationships identified in steps 1 and 2. Use **{record_delimiter}** as the list delimiter. +4. Return output in {language} as a single list of all the entities and relationships identified in steps 1 and 2. Use **{record_delimiter}** as the list delimiter. 5. When finished, output {completion_delimiter} @@ -131,7 +131,7 @@ Please concatenate all of these into a single, comprehensive description. Make sure to include information collected from all the descriptions. If the provided descriptions are contradictory, please resolve the contradictions and provide a single, coherent summary. Make sure it is written in third person, and include the entity names so we the have full context. -Use Chinese as output language. +Use {language} as output language. ####### -Data- @@ -178,7 +178,7 @@ PROMPTS["keywords_extraction"] = """---Role--- You are a helpful assistant tasked with identifying both high-level and low-level keywords in the user's query. -Use Chinese as output language. +Use {language} as output language. ---Goal---