Using aggregator metric file for summary (#1239)

Signed-off-by: noopur <[email protected]>
securefederatedai · Jan 3, 2025 · ca94e42 · ca94e42
1 parent e4305f7
commit ca94e42
Show file tree

Hide file tree

Showing 4 changed files with 70 additions and 70 deletions.
diff --git a/tests/end_to_end/models/model_owner.py b/tests/end_to_end/models/model_owner.py
@@ -145,6 +145,9 @@ def modify_plan(self, param_config, plan_path):
             # Memory Leak related
             data["aggregator"]["settings"]["log_memory_usage"] = self.log_memory_usage
             data["collaborator"]["settings"]["log_memory_usage"] = self.log_memory_usage
+            # Aggregator and collaborators metric logging related
+            data["aggregator"]["settings"]["write_logs"] = True
+            data["collaborator"]["settings"]["write_logs"] = True
 
             data["data_loader"]["settings"]["collaborator_count"] = int(self.num_collaborators)
             data["network"]["settings"]["require_client_auth"] = param_config.require_client_auth

diff --git a/tests/end_to_end/test_suites/memory_logs_tests.py b/tests/end_to_end/test_suites/memory_logs_tests.py
@@ -4,12 +4,11 @@
 import pytest
 import logging
 import os
-import json
 
 from tests.end_to_end.utils.common_fixtures import fx_federation_tr, fx_federation_tr_dws
 import tests.end_to_end.utils.constants as constants
 from tests.end_to_end.utils import federation_helper as fed_helper, ssh_helper as ssh
-from tests.end_to_end.utils.generate_report import generate_memory_report
+from tests.end_to_end.utils.generate_report import generate_memory_report, convert_to_json
 
 log = logging.getLogger(__name__)
 
@@ -79,7 +78,7 @@ def _log_memory_usage(request, fed_obj):
     ), "Aggregator memory usage file is not available"
 
     # Log the aggregator memory usage details
-    memory_usage_dict = _convert_to_json(aggregator_memory_usage_file)
+    memory_usage_dict = convert_to_json(aggregator_memory_usage_file)
     aggregator_path = os.path.join(fed_obj.workspace_path, "aggregator")
     generate_memory_report(memory_usage_dict, aggregator_path)
 
@@ -101,7 +100,7 @@ def _log_memory_usage(request, fed_obj):
             collaborator_memory_usage_file
         ), f"Memory usage file for collaborator {collaborator.collaborator_name} is not available"
 
-        memory_usage_dict = _convert_to_json(collaborator_memory_usage_file)
+        memory_usage_dict = convert_to_json(collaborator_memory_usage_file)
         collaborator_path = os.path.join(fed_obj.workspace_path, collaborator.name)
         generate_memory_report(memory_usage_dict, collaborator_path)
 
@@ -110,21 +109,3 @@ def _log_memory_usage(request, fed_obj):
         ), f"Memory usage details are not available for all rounds for collaborator {collaborator.collaborator_name}"
 
     log.info("Memory usage details are available for all participants")
-
-
-def _convert_to_json(file):
-    """
-    Reads a file containing JSON objects, one per line, and converts them into a list of parsed JSON objects.
-
-    Args:
-        file (str): The path to the file containing JSON objects.
-
-    Returns:
-        list: A list of parsed JSON objects.
-    """
-    with open(file, 'r') as infile:
-        json_objects = infile.readlines()
-
-    # Parse each JSON object
-    parsed_json_objects = [json.loads(obj) for obj in json_objects]
-    return parsed_json_objects
diff --git a/tests/end_to_end/utils/generate_report.py b/tests/end_to_end/utils/generate_report.py
@@ -1,22 +1,27 @@
+# Copyright 2020-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
 import pandas as pd
 import matplotlib.pyplot as plt
-import numpy as np
+import json
 from scipy.stats import linregress
 from fpdf import FPDF
 
+
 class PDF(FPDF):
     def header(self):
-        self.set_font('Arial', 'B', 14)
+        self.set_font("Arial", "B", 14)
 
     def chapter_title(self, title):
         self.add_page()
-        self.set_font('Arial', 'B', 14)  # Set font to bold for title
-        self.cell(0, 10, title, 0, 1, 'L')
+        self.set_font("Arial", "B", 14)  # Set font to bold for title
+        self.cell(0, 10, title, 0, 1, "L")
 
     def chapter_body(self, body):
-        self.set_font('Arial', '', 12)
+        self.set_font("Arial", "", 12)
         self.multi_cell(0, 10, body)
 
+
 def generate_memory_report(memory_usage_dict, workspace_path):
     """
     Generates a memory usage report from a CSV file.
@@ -32,25 +37,25 @@ def generate_memory_report(memory_usage_dict, workspace_path):
 
     # Plotting the chart
     plt.figure(figsize=(10, 5))
-    plt.plot(data['round_number'], data['virtual_memory/used'], marker='o')
-    plt.title('Memory Usage per Round')
-    plt.xlabel('round_number')
-    plt.ylabel('Virtual Memory Used (MB)')
+    plt.plot(data["round_number"], data["virtual_memory/used"], marker="o")
+    plt.title("Memory Usage per Round")
+    plt.xlabel("round_number")
+    plt.ylabel("Virtual Memory Used (MB)")
     plt.grid(True)
     output_path = f"{workspace_path}/mem_usage_plot.png"
     plt.savefig(output_path)
     plt.close()
 
     # Calculate statistics
-    min_mem = round(data['virtual_memory/used'].min(), 2)
-    max_mem = round(data['virtual_memory/used'].max(), 2)
-    mean_mem = round(data['virtual_memory/used'].mean(), 2)
-    variance_mem = round(data['virtual_memory/used'].var(), 2)
-    std_dev_mem = round(data['virtual_memory/used'].std(), 2)
-    slope, _, _, _, _ = linregress(data.index, data['virtual_memory/used'])
+    min_mem = round(data["virtual_memory/used"].min(), 2)
+    max_mem = round(data["virtual_memory/used"].max(), 2)
+    mean_mem = round(data["virtual_memory/used"].mean(), 2)
+    variance_mem = round(data["virtual_memory/used"].var(), 2)
+    std_dev_mem = round(data["virtual_memory/used"].std(), 2)
+    slope, _, _, _, _ = linregress(data.index, data["virtual_memory/used"])
     slope = round(slope, 2)
     stats_path = f"{workspace_path}/mem_stats.txt"
-    with open(stats_path, 'w') as file:
+    with open(stats_path, "w") as file:
         file.write(f"Minimum Memory Used: {min_mem} MB\n")
         file.write(f"Maximum Memory Used: {max_mem} MB\n")
         file.write(f"Mean Memory Used: {mean_mem} MB\n")
@@ -69,33 +74,54 @@ def generate_memory_report(memory_usage_dict, workspace_path):
 
     print("Memory report generation completed. Report saved to:", pdf_output_path)
 
+
 def add_introduction(pdf):
-    pdf.chapter_title('Introduction')
-    intro_text = ("The purpose of this memory analysis is to identify memory usage trends and potential bottlenecks. "
-                  "This analysis focuses on the relationship between round information and memory usage.")
+    pdf.chapter_title("Introduction")
+    intro_text = (
+        "The purpose of this memory analysis is to identify memory usage trends and potential bottlenecks. "
+        "This analysis focuses on the relationship between round information and memory usage."
+    )
     pdf.chapter_body(intro_text)
 
+
 def add_chart_analysis(pdf, output_path, data):
-    pdf.chapter_title('Chart Analysis')
+    pdf.chapter_title("Chart Analysis")
     pdf.image(output_path, w=180)
-    diffs = data['virtual_memory/used'].diff().round(2)
+    diffs = data["virtual_memory/used"].diff().round(2)
     significant_changes = diffs[diffs.abs() > 500]
     for index, value in significant_changes.items():
-        pdf.chapter_body(f"Significant memory change: {value} MB at Round {data['round_number'][index]}")
+        pdf.chapter_body(
+            f"Significant memory change: {value} MB at Round {data['round_number'][index]}"
+        )
+
 
 def add_statistical_overview(pdf, stats_path):
-    pdf.chapter_title('Statistical Overview')
-    with open(stats_path, 'r') as file:
+    pdf.chapter_title("Statistical Overview")
+    with open(stats_path, "r") as file:
         stats = file.read()
     pdf.chapter_body(stats)
 
+
 def add_conclusion(pdf, slope):
-    pdf.chapter_title('Conclusion')
+    pdf.chapter_title("Conclusion")
     if slope > 0:
         conclusion_text = "The upward slope in the graph indicates a trend of increasing memory usage over rounds."
     else:
         conclusion_text = "There is no continuous memory growth."
     pdf.chapter_body(conclusion_text)
 
-# Uncomment the following line to run the function directly when this script is executed
-# generate_memory_report('/home/sys_tpe_st_svc_acct/memory_leak/mem_info_aggr.csv')
+
+def convert_to_json(file):
+    """
+    Reads a file containing JSON objects, one per line, and converts them into a list of parsed JSON objects.
+    Args:
+        file (str): The path to the file containing JSON objects.
+    Returns:
+        list: A list of parsed JSON objects.
+    """
+    with open(file, "r") as infile:
+        json_objects = infile.readlines()
+
+    # Parse each JSON object
+    parsed_json_objects = [json.loads(obj) for obj in json_objects]
+    return parsed_json_objects
diff --git a/tests/end_to_end/utils/summary_helper.py b/tests/end_to_end/utils/summary_helper.py
@@ -7,6 +7,7 @@
 from pathlib import Path
 
 import tests.end_to_end.utils.constants as constants
+from tests.end_to_end.utils.generate_report import convert_to_json
 
 # Initialize the XML parser
 parser = etree.XMLParser(recover=True, encoding="utf-8")
@@ -38,26 +39,10 @@ def get_aggregated_accuracy(agg_log_file):
         )
         return agg_accuracy
 
-    # Example line(s) containing spaces and special characters:
-    """
-    METRIC   {'metric_origin': 'aggregator', 'task_name': 'aggregated_model_validation', 'metric_name': 'accuracy', 'metric_value':     aggregator.py:933
-        0.15911591053009033, 'round': 0}
-    """
-    try:
-        with open(agg_log_file, 'r') as f:
-            for line in f:
-                if "'metric_origin': 'aggregator'" in line and "aggregated_model_validation" in line:
-                    # In Python versions < 3.11, aggregator.py file name appears in the line
-                    # whereas in Python version 3.11, it is utils.py
-                    line = line.split("aggregator.py:")[0].strip()
-                    line = line.split("utils.py:")[0].strip()
-                    # If the line does not contain closing bracket "}", then concatenate the next line
-                    reqd_line = line if "}" in line else line + next(f).strip()
-                    agg_accuracy = eval(reqd_line.split("METRIC")[1].strip('"'))["metric_value"]
-    except Exception as e:
-        # Do not fail the test if the accuracy cannot be fetched
-        print(f"Error while reading aggregator log file: {e}")
-
+    agg_accuracy_dict = convert_to_json(agg_log_file)
+    agg_accuracy = agg_accuracy_dict[-1].get(
+        "aggregator/aggregated_model_validation/accuracy", "Not Found"
+    )
     return agg_accuracy
 
 
@@ -153,7 +138,12 @@ def main():
 
     # Assumption - result directory is present in the home directory
     agg_log_file = os.path.join(
-        result_path, model_name, "aggregator", "workspace", "aggregator.log"
+        result_path,
+        model_name,
+        "aggregator",
+        "workspace",
+        "logs",
+        "aggregator_metrics.txt",
     )
     agg_accuracy = get_aggregated_accuracy(agg_log_file)