Source code for mesalab.analyzis.mesa_analyzer

import os
import pandas as pd
import numpy as np
import yaml
import logging
from tqdm import tqdm
import sys

# --- CORRECTED IMPORTS ---
# Import the actual functions from their correct respective modules
from mesalab.analyzis.data_reader import scan_mesa_runs, get_data_from_history_file
from mesalab.bluelooptools.blue_loop_analyzer import analyze_blue_loop_and_instability
from mesalab.rsptools import generate_mesa_rsp_inlists
from mesalab.io.output_manager import create_output_directories
from mesalab.io.inlist_parser import get_mesa_params_from_inlist # NEW: Import to get initial_y

logger = logging.getLogger(__name__)

[docs] def perform_mesa_analysis(args, analysis_results_sub_dir, detail_files_output_dir, gyre_input_csv_name: str = 'sorted_blue_loop_profiles.csv', rsp_output_subdir: str = None): """ Coordinates the analysis of MESA runs, including blue loop analysis, data aggregation, and saving summary and detailed results. Args: args (argparse.Namespace): Command-line arguments containing input_dir, inlist_name, analyze_blue_loop, blue_loop_output_type, force_reanalysis. analysis_results_sub_dir (str): Path to the directory for summary/cross-grid CSVs. detail_files_output_dir (str): Path to the directory for detailed blue loop CSVs. gyre_input_csv_name (str): The desired filename for the CSV containing profiles information for the GYRE workflow. Defaults to 'sorted_blue_loop_profiles.csv'. rsp_output_subdir (str, optional): Base directory where MESA RSP inlists should be saved. Defaults to None, in which case the RSP inlists will be generated relative to the original MESA run directories. Returns: tuple: A tuple containing: - pd.DataFrame: The main summary DataFrame of analysis results. - pd.DataFrame: A combined DataFrame of detailed blue loop data for plotting (combined_detail_data_for_plotting). - dict: A dictionary where keys are metallicities (Z) and values are lists of full, untrimmed history DataFrames for plotting (full_history_data_for_plotting). - str: The full path to the generated GYRE input CSV file. Returns an empty string if the CSV was not generated. - list: A list of paths to the generated RSP inlist files. """ # Directory creation logic is now handled by output_manager.create_output_directories # The `create_output_directories` function from `output_manager.py` # should be called in `cli.py` *before* calling `perform_mesa_analysis`. # This function `perform_mesa_analysis` then receives the *already created* # paths. skipped_runs_log_path = os.path.join(analysis_results_sub_dir, "skipped_runs_log.txt") # Clear old skipped runs log if reanalysis is forced if args.general_settings.force_reanalysis and os.path.exists(skipped_runs_log_path): try: os.remove(skipped_runs_log_path) logger.info(f"Removed old skipped runs log: {skipped_runs_log_path}") except OSError as e: logger.warning(f"Could not remove old skipped runs log {skipped_runs_log_path}: {e}") input_dir = args.general_settings.input_dir inlist_name = args.general_settings.inlist_name analyze_blue_loop = args.blue_loop_analysis.analyze_blue_loop blue_loop_output_type = args.blue_loop_analysis.blue_loop_output_type force_reanalysis = args.general_settings.force_reanalysis # These paths should ideally come from output_manager.get_analysis_file_paths in cli.py # or be passed into this function. For now, they are redefined here for a self-contained # update, but consider refactoring if these are also managed by output_manager. summary_csv_path = os.path.join(analysis_results_sub_dir, "summary_results.csv") # cross_csv_path will now be generated inside the loop for each Y value, # so we'll store the paths in a list if needed to return multiple. # For now, we'll keep `cross_csv_path` as a base for generating the filenames. base_cross_csv_path = os.path.join(analysis_results_sub_dir, "crossing_count_grid") # Initialize gyre_input_csv_name_from_config gyre_input_csv_name_from_config = None if args.gyre_workflow.run_gyre_workflow: gyre_input_csv_name_from_config = args.gyre_workflow.filtered_profiles_csv_name if gyre_input_csv_name_from_config is None: logger.warning("GYRE workflow enabled but 'filtered_profiles_csv_name' not specified in config. Using 'sorted_blue_loop_profiles.csv' as default.") gyre_input_csv_name_from_config = "sorted_blue_loop_profiles.csv" gyre_input_csv_path = None if gyre_input_csv_name_from_config is not None: gyre_input_csv_path = os.path.join(analysis_results_sub_dir, gyre_input_csv_name_from_config) else: logger.info("GYRE input CSV path not initialized as GYRE workflow is disabled or name is missing.") # Reanalysis logic needs to be updated to account for multiple cross-grid files # For simplicity, we'll assume if *any* cross-grid exists, and force_reanalysis is False, # we try to load. A more robust solution might check all expected Y-specific cross-grids. reanalysis_needed = force_reanalysis or \ not os.path.exists(summary_csv_path) # We can't easily check for all potential cross_csv_paths here without knowing unique_ys beforehand. # So, if reanalysis_needed is false based on summary_csv, we'll proceed, but cross-grids will be # generated if they don't exist later. detail_csvs_exist = True if analyze_blue_loop: if not os.path.exists(detail_files_output_dir) or not os.listdir(detail_files_output_dir): detail_csvs_exist = False if not force_reanalysis: logger.info(f"Detailed blue loop CSVs not found in '{detail_files_output_dir}'. Forcing reanalysis to generate them for plotting.") reanalysis_needed = True logger.info(f"Analysis started. Full reanalysis needed: {reanalysis_needed}") summary_df = pd.DataFrame() combined_detail_data_for_plotting = pd.DataFrame() full_history_data_for_plotting = {} gyre_output_csv_path_returned = "" generated_rsp_inlists_paths = [] generated_cross_csv_paths = [] # NEW: To store paths of generated cross-grids if not reanalysis_needed: logger.info("Summary and cross-grid CSV files already exist. Attempting to load existing data.") try: # Load summary_df loaded_summary_df = pd.read_csv(summary_csv_path) # Load as regular DataFrame first # Check for initial_Y in columns before setting index index_cols = ['initial_Z', 'initial_mass'] if 'initial_Y' in loaded_summary_df.columns: index_cols.insert(1, 'initial_Y') # Insert Y after Z if all(col in loaded_summary_df.columns for col in index_cols): loaded_summary_df.set_index(index_cols, inplace=True) else: logger.warning(f"Loaded summary CSV is missing one or more expected index columns ({index_cols}). Cannot set index properly. Reanalysis might be needed.") reanalysis_needed = True # Force reanalysis if index columns are missing if not reanalysis_needed: # Only proceed if index setting was successful or not needed if analyze_blue_loop: filtered_loaded_summary_df = loaded_summary_df[ (loaded_summary_df['blue_loop_crossing_count'].notna()) & (loaded_summary_df['blue_loop_crossing_count'] > 0) ].copy() else: filtered_loaded_summary_df = loaded_summary_df.copy() logger.info(f"Blue loop analysis is OFF. All entries from '{summary_csv_path}' will be considered for GYRE input.") if filtered_loaded_summary_df.empty and not loaded_summary_df.empty: logger.warning("Loaded summary CSV contained no valid blue loop entries after filtering. Forcing reanalysis if blue loop analysis is on.") if analyze_blue_loop: reanalysis_needed = True else: logger.info("No successful MESA runs found in loaded summary CSV. Cannot generate GYRE input.") return pd.DataFrame(), pd.DataFrame(), {}, "", [], [] # Return empty list for cross_csv_paths elif filtered_loaded_summary_df.empty and loaded_summary_df.empty: logger.info("Loaded summary CSV was empty. No valid entries found.") return pd.DataFrame(), pd.DataFrame(), {}, "", [], [] # Return empty list for cross_csv_paths else: summary_df = filtered_loaded_summary_df logger.info("Successfully loaded and filtered existing summary CSV.") if args.gyre_workflow.get('run_gyre_workflow', False) and gyre_input_csv_path: if os.path.exists(gyre_input_csv_path): try: gyre_input_df_loaded = pd.read_csv(gyre_input_csv_path) logger.info(f"Successfully loaded existing GYRE input CSV from {gyre_input_csv_path}") gyre_output_csv_path_returned = gyre_input_csv_path except Exception as e: logger.error(f"Failed to load existing GYRE input CSV from {gyre_input_csv_path}: {e}") logger.exception("GYRE input CSV loading exception details:") gyre_output_csv_path_returned = "" else: logger.warning(f"GYRE workflow enabled, but existing GYRE input CSV '{gyre_input_csv_name_from_config}' not found at {gyre_input_csv_path}. It will not be generated unless a full reanalysis is triggered.") gyre_output_csv_path_returned = "" else: logger.info("GYRE workflow is disabled or GYRE input CSV path not defined. Skipping loading of existing GYRE input CSV.") gyre_output_csv_path_returned = "" if not reanalysis_needed: logger.info("Detail data not in memory; attempting to load from disk for plotting...") combined_detail_dfs = [] if os.path.exists(detail_files_output_dir): logger.info(f"Loading CSV files from '{detail_files_output_dir}'...") for f_name in os.listdir(detail_files_output_dir): if f_name.endswith(".csv"): try: df = pd.read_csv(os.path.join(detail_files_output_dir, f_name)) combined_detail_dfs.append(df) except Exception as e: logger.warning(f"Failed to load detail CSV '{f_name}': {e}") if combined_detail_dfs: combined_detail_data_for_plotting = pd.concat(combined_detail_dfs, ignore_index=True) logger.info(f"Successfully loaded {len(combined_detail_dfs)} detail CSVs.") # Ensure sorting by initial_Y as well if it's in the data sort_cols = ['initial_Z', 'initial_mass', 'star_age'] if 'initial_Y' in combined_detail_data_for_plotting.columns: sort_cols.insert(1, 'initial_Y') # Insert Y after Z combined_detail_data_for_plotting = combined_detail_data_for_plotting.sort_values( by=sort_cols ).reset_index(drop=True) else: logger.error(f"No CSV files loaded from '{detail_files_output_dir}'.") else: logger.error(f"Detail files output directory '{detail_files_output_dir}' does not exist.") # Also try to load existing cross-grid CSVs. # This will require scanning for files matching the new naming convention. if os.path.exists(analysis_results_sub_dir): for fname in os.listdir(analysis_results_sub_dir): if fname.startswith("crossing_count_grid") and fname.endswith(".csv"): generated_cross_csv_paths.append(os.path.join(analysis_results_sub_dir, fname)) if not generated_cross_csv_paths: logger.warning("No existing cross-grid CSVs found. Full reanalysis might be needed to generate them.") reanalysis_needed = True # If we didn't find them, force reanalysis to generate. # This return needs to be inside the "if not reanalysis_needed" block to actually return if not reanalysis_needed: return summary_df, combined_detail_data_for_plotting, full_history_data_for_plotting, gyre_output_csv_path_returned, generated_rsp_inlists_paths, generated_cross_csv_paths # NEW: return list of cross_csv_paths except FileNotFoundError: logger.warning(f"Existing summary or cross-grid CSVs not found. Forcing full reanalysis.") reanalysis_needed = True except Exception as e: logger.error(f"Error loading existing summary CSV: {e}. Forcing full reanalysis.") logger.exception("Error details:") reanalysis_needed = True if reanalysis_needed: logger.info("Starting full analysis of MESA runs...") mesa_run_infos = scan_mesa_runs(input_dir, inlist_name) if not mesa_run_infos: logger.info("No MESA runs found for full analysis. Returning empty DataFrames.") return pd.DataFrame(), pd.DataFrame(), {}, "", [], [] # Return empty list for cross_csv_paths # Extract all unique parameters for index/columns unique_masses = sorted(set(run['mass'] for run in mesa_run_infos)) unique_zs = sorted(set(run['z'] for run in mesa_run_infos)) unique_ys = sorted(set(run['y'] for run in mesa_run_infos)) # NEW: Get unique Y values if not unique_masses or not unique_zs or not unique_ys: # NEW: Check Y as well logger.error("Error: Could not determine unique masses, metallicities, or helium abundances from runs. Returning empty DataFrames.") return pd.DataFrame(), pd.DataFrame(), {}, "", [], [] # Return empty list for cross_csv_paths # We will generate `cross_data_matrix` per Y value later. # cross_data_matrix = pd.DataFrame(np.nan, index=unique_zs, columns=unique_masses) # cross_data_matrix.index.name = "Z" # cross_data_matrix.columns.name = "Mass" summary_data = [] # Group detailed data by Z and Y grouped_detailed_dfs_for_analysis_raw = {z_val: {y_val: [] for y_val in unique_ys} for z_val in unique_zs} full_history_data_for_plotting = {z_val: {y_val: [] for y_val in unique_ys} for z_val in unique_zs} # Nested dict for Z and Y yaml_data = {} for run_info in sorted(mesa_run_infos, key=lambda x: (x['z'], x['y'], x['mass'])): # Sort by Y too z_key = f"Z_{run_info['z']:.4f}" y_key = f"Y_{run_info['y']:.3f}" # NEW: Y key for YAML if z_key not in yaml_data: yaml_data[z_key] = {} if y_key not in yaml_data[z_key]: # NEW: Nested Y under Z in YAML yaml_data[z_key][y_key] = {} mass_key = f"M_{run_info['mass']:.1f}" yaml_data[z_key][y_key][mass_key] = { # Store under Z, Y, Mass 'run_directory': os.path.basename(run_info['run_dir_path']), 'history_file': os.path.basename(run_info['history_file_path']), } yaml_file_path = os.path.join(analysis_results_sub_dir, "processed_runs_overview.yaml") try: with open(yaml_file_path, 'w') as f: yaml.dump(yaml_data, f, indent=4, sort_keys=False) logger.info(f"Generated YAML overview of processed runs: {yaml_file_path}") except Exception as e: logger.error(f"Could not write YAML overview file: {e}") if os.path.exists(skipped_runs_log_path): os.remove(skipped_runs_log_path) # Clear previous log total_runs_for_analysis = len(mesa_run_infos) with tqdm(total=total_runs_for_analysis, desc="Performing MESA Run Analysis") as pbar: for run_info in mesa_run_infos: current_mass = run_info['mass'] current_z = run_info['z'] current_y = run_info['y'] history_file_path = run_info['history_file_path'] run_dir_path = run_info['run_dir_path'] analysis_result_summary = { 'initial_mass': current_mass, 'initial_Z': current_z, 'initial_Y': current_y, 'run_dir_path': run_dir_path, 'blue_loop_crossing_count': np.nan, 'min_log_L': np.nan, 'min_log_Teff': np.nan,'min_log_R': np.nan, 'max_log_L': np.nan, 'max_log_Teff': np.nan,'max_log_R': np.nan, 'first_model_number': np.nan, 'last_model_number': np.nan, 'first_age_yr': np.nan, 'last_age_yr': np.nan, 'blue_loop_start_age': np.nan, 'blue_loop_end_age': np.nan, 'instability_start_age': np.nan, 'instability_end_age': np.nan, 'calculated_blue_loop_duration': np.nan, 'calculated_instability_duration': np.nan, 'analysis_status': 'Success' # Default status } current_detail_df = pd.DataFrame() try: df_full_history = get_data_from_history_file(history_file_path) df_full_history['initial_mass'] = current_mass df_full_history['initial_Z'] = current_z df_full_history['initial_Y'] = current_y df_full_history['run_dir_path'] = run_dir_path if current_z not in full_history_data_for_plotting: full_history_data_for_plotting[current_z] = {} # Ensure nested dict if current_y not in full_history_data_for_plotting[current_z]: # Ensure nested dict full_history_data_for_plotting[current_z][current_y] = [] full_history_data_for_plotting[current_z][current_y].append(df_full_history.copy()) # Store by Z and Y if analyze_blue_loop: analyzer_output = analyze_blue_loop_and_instability(df_full_history, current_mass, current_z, current_y) if not analyzer_output['blue_loop_detail_df'].empty: bl_df = analyzer_output['blue_loop_detail_df'].copy() bl_df['run_dir_path'] = run_dir_path # Ensure run_dir_path is also in bl_df for consistency bl_df['initial_mass'] = current_mass # Add mass to detail df bl_df['initial_Z'] = current_z # Add Z to detail df bl_df['initial_Y'] = current_y # NEW: Add initial_Y to bl_df if blue_loop_output_type == 'all': current_detail_df = bl_df else: concise_detail_columns_local = [ 'initial_mass', 'initial_Z', 'initial_Y', 'star_age', 'model_number', 'log_Teff', 'log_L', 'log_g', 'profile_number', 'run_dir_path' ] existing_desired_cols = [col for col in concise_detail_columns_local if col in bl_df.columns] if existing_desired_cols: current_detail_df = bl_df[existing_desired_cols] else: logger.warning(f"No desired columns found for concise detail for M={current_mass}, Z={current_z}, Y={current_y}. Detail DF for plotting might remain empty.") current_detail_df = pd.DataFrame() else: logger.info(f"analyzer_output['blue_loop_detail_df'] was empty for M={current_mass}, Z={current_z}, Y={current_y}. No detailed data for this run.") if pd.notna(analyzer_output['crossing_count']): analysis_result_summary['blue_loop_crossing_count'] = int(analyzer_output['crossing_count']) if analysis_result_summary['blue_loop_crossing_count'] > 0: state_times = analyzer_output['state_times'] # These metrics are calcualted in the blueloop analyzer, so we just copy them analysis_result_summary['blue_loop_start_age'] = state_times.get('first_is_entry_age', np.nan) analysis_result_summary['blue_loop_end_age'] = state_times.get('last_is_exit_age', np.nan) analysis_result_summary['calculated_blue_loop_duration'] = analyzer_output.get('calculated_blue_loop_duration', np.nan) analysis_result_summary['instability_start_age'] = state_times.get('instability_start_age', np.nan) analysis_result_summary['instability_end_age'] = state_times.get('instability_end_age', np.nan) analysis_result_summary['calculated_instability_duration'] = analyzer_output.get('calculated_instability_duration', np.nan) if not current_detail_df.empty: analysis_result_summary['min_log_L'] = current_detail_df['log_L'].min() analysis_result_summary['min_log_Teff'] = current_detail_df['log_Teff'].min() if 'log_R' in current_detail_df.columns: analysis_result_summary['min_log_R'] = current_detail_df['log_R'].min() elif 'log_R' in df_full_history.columns: analysis_result_summary['min_log_R'] = df_full_history['log_R'].min() analysis_result_summary['max_log_L'] = current_detail_df['log_L'].max() analysis_result_summary['max_log_Teff'] = current_detail_df['log_Teff'].max() if 'log_R' in current_detail_df.columns: analysis_result_summary['max_log_R'] = current_detail_df['log_R'].max() elif 'log_R' in df_full_history.columns: analysis_result_summary['max_log_R'] = df_full_history['log_R'].max() analysis_result_summary['first_model_number'] = current_detail_df['model_number'].min() analysis_result_summary['last_model_number'] = current_detail_df['model_number'].max() analysis_result_summary['first_age_yr'] = current_detail_df['star_age'].min() analysis_result_summary['last_age_yr'] = current_detail_df['star_age'].max() else: logger.warning(f"current_detail_df is empty for M={current_mass}, Z={current_z}, Y={current_y} despite blue loop found (count > 0). Detailed summary metrics will be NaN.") else: logger.info(f"No blue loop found (0 crossings) for M={current_mass}, Z={current_z}, Y={current_y}. Blue loop summary metrics will be NaN.") else: logger.warning(f"Blue loop analysis failed for M={current_mass}, Z={current_z}, Y={current_y}. Blue loop summary metrics will be NaN.") current_detail_df = pd.DataFrame() else: logger.info(f"Skipping blue loop analysis for M={current_mass}, Z={current_z}, Y={current_y} as analyze_blue_loop is False.") # Add Y to message analysis_result_summary['blue_loop_crossing_count'] = np.nan current_detail_df = pd.DataFrame() summary_data.append(analysis_result_summary) if analyze_blue_loop and not current_detail_df.empty: # Ensure proper nested structure for grouped_detailed_dfs_for_analysis_raw if current_z not in grouped_detailed_dfs_for_analysis_raw: grouped_detailed_dfs_for_analysis_raw[current_z] = {} if current_y not in grouped_detailed_dfs_for_analysis_raw[current_z]: grouped_detailed_dfs_for_analysis_raw[current_z][current_y] = [] grouped_detailed_dfs_for_analysis_raw[current_z][current_y].append(current_detail_df) # This part ensures that `combined_detail_data_for_plotting` is built correctly # by concatenating all `current_detail_df` instances as they are processed. if combined_detail_data_for_plotting.empty: combined_detail_data_for_plotting = current_detail_df.copy() else: combined_detail_data_for_plotting = pd.concat([combined_detail_data_for_plotting, current_detail_df], ignore_index=True) except Exception as err: with open(skipped_runs_log_path, 'a') as log_file: log_file.write(f"Skipped run {run_info['run_dir_path']} due to error: {err}\n") logger.error(f"Skipped run {run_info['run_dir_path']} due to error: {err}") logger.exception(f"Exception details for run {run_info['run_dir_path']}:") # Append a summary entry even if there was an error, marking it as such error_summary = { 'initial_mass': current_mass, 'initial_Z': current_z, 'initial_Y': current_y, # Ensure Y is recorded even on error 'run_dir_path': run_dir_path, 'blue_loop_crossing_count': np.nan, 'min_log_L': np.nan, 'min_log_Teff': np.nan, 'min_log_R': np.nan, 'max_log_L': np.nan, 'max_log_Teff': np.nan, 'max_log_R': np.nan, 'first_model_number': np.nan, 'last_model_number': np.nan, 'first_age_yr': np.nan, 'last_age_yr': np.nan, 'blue_loop_start_age': np.nan, 'blue_loop_end_age': np.nan, 'instability_start_age': np.nan, 'instability_end_age': np.nan, 'calculated_blue_loop_duration': np.nan, 'calculated_instability_duration': np.nan, 'analysis_status': f"Error: {str(err)[:100]}" # Truncate error message } summary_data.append(error_summary) pbar.update(1) summary_df_raw = pd.DataFrame(summary_data) # Ensure 'initial_Y' is included in the sort and index for comprehensive results summary_df_raw.sort_values(['initial_Z', 'initial_Y', 'initial_mass'], inplace=True) summary_df_raw.set_index(['initial_Z', 'initial_Y', 'initial_mass'], inplace=True) # NEW: Set Y as part of the index if analyze_blue_loop: summary_df_to_save = summary_df_raw[ (summary_df_raw['blue_loop_crossing_count'].notna()) & (summary_df_raw['blue_loop_crossing_count'] > 0) ].copy() if summary_df_to_save.empty: logger.info("No valid blue loop entries found after filtering for summary CSV.") else: logger.info(f"Generated summary CSV will contain {len(summary_df_to_save)} blue loop entries.") else: summary_df_to_save = summary_df_raw.copy() logger.info(f"Blue loop analysis is OFF. Generated summary CSV will contain all {len(summary_df_to_save)} successful MESA runs.") if summary_df_to_save.empty: # Re-define columns including 'initial_Y' summary_df = pd.DataFrame(columns=[ 'initial_mass', 'initial_Z', 'initial_Y', 'run_dir_path', # Added initial_Y here 'blue_loop_crossing_count', 'blue_loop_start_age', 'blue_loop_end_age', 'instability_start_age', 'instability_end_age', 'calculated_blue_loop_duration', 'calculated_instability_duration', 'min_log_L', 'min_log_Teff', 'min_log_R', 'max_log_L', 'max_log_Teff', 'max_log_R', 'first_model_number', 'last_model_number', 'first_age_yr', 'last_age_yr', 'analysis_status' # Include analysis_status ], index=pd.MultiIndex.from_tuples([], names=['initial_Z', 'initial_Y', 'initial_mass'])) else: summary_df = summary_df_to_save.copy() if blue_loop_output_type == 'summary' and analyze_blue_loop: logger.info("Applying 'summary' output type filtering for summary CSV columns.") summary_columns_for_summary_output = [ 'blue_loop_crossing_count', 'blue_loop_start_age', 'blue_loop_end_age', 'instability_start_age', 'instability_end_age', 'calculated_blue_loop_duration', 'calculated_instability_duration', 'run_dir_path', 'analysis_status' # Keep status ] existing_summary_cols = [col for col in summary_columns_for_summary_output if col in summary_df.columns] summary_df = summary_df[existing_summary_cols].copy() summary_df.to_csv(summary_csv_path, na_rep='NaN') logger.info(f"Summary CSV written to {summary_csv_path}") # --- MODIFIED CROSS-GRID GENERATION LOGIC --- # Iterate through each unique Y value to create a separate cross-grid. # This resolves the "cannot reindex on an axis with duplicate labels" error. generated_cross_csv_paths = [] # Reset for reanalysis case if 'blue_loop_crossing_count' in summary_df_raw.columns: for y_val in unique_ys: logger.info(f"Generating cross-grid for initial_Y = {y_val:.3f}...") # Filter the summary data for the current Y value # We need to reset index first to filter on 'initial_Y' column, then set it back or unstack. # A simpler way is to use .xs() if 'initial_Y' is part of a MultiIndex, then unstack. # If summary_df_raw is already multi-indexed by (Z, Y, Mass): try: summary_filtered_by_y = summary_df_raw.xs(y_val, level='initial_Y', drop_level=False) # Now, create the cross-grid (Z vs Mass) for this specific Y # We need to drop the 'initial_Y' level after filtering, then unstack 'initial_mass' cross_data_matrix_for_y = summary_filtered_by_y['blue_loop_crossing_count'].droplevel('initial_Y').unstack(level='initial_mass') if not cross_data_matrix_for_y.empty: # Ensure columns and index are numeric for sorting cross_data_matrix_for_y.columns = pd.to_numeric(cross_data_matrix_for_y.columns, errors='coerce') cross_data_matrix_for_y.index = pd.to_numeric(cross_data_matrix_for_y.index, errors='coerce') # Sort the index and columns. The reindex on unique values is now safe # because we've filtered by Y, removing the source of duplicates on the Z axis. cross_data_matrix_for_y = cross_data_matrix_for_y.reindex( index=sorted(cross_data_matrix_for_y.index.unique()), columns=sorted(cross_data_matrix_for_y.columns.unique()) ) cross_data_matrix_for_y = cross_data_matrix_for_y.where(pd.notna(cross_data_matrix_for_y), np.nan) else: logger.warning(f"Cross-grid matrix is empty for initial_Y={y_val:.3f}. It might be due to no blue loop crossings for this Y or data structure issues.") # Still create an empty DataFrame with expected dimensions if no data cross_data_matrix_for_y = pd.DataFrame(np.nan, index=unique_zs, columns=unique_masses) # Construct a new filename including the Y value # cross_csv_path_for_y = f"{base_cross_csv_path}_Y{y_val:.3f}.csv" # Example: crossing_count_grid_Y0.256.csv cross_csv_path_for_y = f"{base_cross_csv_path}.csv" cross_data_matrix_for_y.to_csv(cross_csv_path_for_y, na_rep='NaN') generated_cross_csv_paths.append(cross_csv_path_for_y) logger.info(f"Cross-grid CSV for Y={y_val:.3f} written to {cross_csv_path_for_y}") except KeyError as ke: logger.warning(f"Skipping cross-grid generation for Y={y_val:.3f} due to missing key in summary data: {ke}") except Exception as e: logger.error(f"Critical error during cross-grid generation for Y={y_val:.3f}: {e}", exc_info=True) else: logger.warning("No 'blue_loop_crossing_count' in summary_df_raw. No cross-grid matrices will be generated.") # Combine all detailed data for plotting/RSP generation combined_detail_data_for_plotting = pd.DataFrame() if analyze_blue_loop: concise_detail_columns_for_saving = [ 'initial_mass', 'initial_Z', 'initial_Y', 'star_age', 'model_number', # NEW: Add initial_Y here 'log_Teff', 'log_L', 'log_g', 'profile_number', 'run_dir_path' ] for z_val, y_dict in grouped_detailed_dfs_for_analysis_raw.items(): # Iterate through Z and Y for y_val, dfs_list in y_dict.items(): if dfs_list: try: combined_df_bl = pd.concat(dfs_list, ignore_index=True) # Sort by Y, then Mass, then Age for consistent output combined_df_bl = combined_df_bl.sort_values(by=['initial_Y', 'initial_mass', 'star_age']).reset_index(drop=True) if blue_loop_output_type == 'all': df_to_save = combined_df_bl output_type_label = "all columns" else: existing_desired_cols = [col for col in concise_detail_columns_for_saving if col in combined_df_bl.columns] if not existing_desired_cols: logger.warning(f"No desired columns found for concise detail CSV for Z={z_val}, Y={y_val}. Skipping detail CSV write.") continue df_to_save = combined_df_bl[existing_desired_cols] output_type_label = "selected columns" # detail_filename = os.path.join(detail_files_output_dir, f"detail_z{z_val:.4f}_y{y_val:.3f}.csv") detail_filename = os.path.join(detail_files_output_dir, f"detail_z{z_val:.4f}.csv") df_to_save.to_csv(detail_filename, index=False, na_rep='NaN') logger.info(f"Written concatenated detail CSV for Z={z_val}, Y={y_val} with {output_type_label} to {detail_filename}") # Log with Y # Accumulate into combined_detail_data_for_plotting only if it hasn't been populated from disk already # This block is for building it during reanalysis if combined_detail_data_for_plotting.empty: combined_detail_data_for_plotting = df_to_save.copy() else: combined_detail_data_for_plotting = pd.concat([combined_detail_data_for_plotting, df_to_save], ignore_index=True) except Exception as e: logger.error(f"Error writing detail CSV for Z={z_val}, Y={y_val}: {e}", exc_info=True) # Log with Y else: logger.info(f"No detailed data to write for Z={z_val}, Y={y_val}.") # Log with Y else: logger.info("Skipping detailed blue loop CSV generation: Blue loop analysis is disabled.") # Logic for RSP inlists generation if args.rsp_workflow.run_rsp_workflow: logger.info("Generating MESA RSP inlists.") try: rsp_template_path = args.rsp_workflow.rsp_inlist_template_path rsp_output_dir = rsp_output_subdir try: generated_rsp_inlists_paths = generate_mesa_rsp_inlists( detail_df=combined_detail_data_for_plotting, # Use the combined df which now includes initial_Y mesa_output_base_dir=input_dir, # The root directory of your MESA runs rsp_inlist_template_path=rsp_template_path, rsp_output_subdir=rsp_output_dir ) except TypeError as e: logger.error(f"Error during MESA RSP inlist generation: {e}") # Return a value that indicates failure or exit the program return {"status": "error", "message": str(e)} # Check if the function returned an error status, and handle it gracefully if isinstance(generated_rsp_inlists_paths, dict) and generated_rsp_inlists_paths.get("status") == "error": return {"successful": False, "message": "RSP inlist generation failed."} if generated_rsp_inlists_paths: logger.info(f"Successfully generated {len(generated_rsp_inlists_paths)} MESA RSP inlist files.") else: logger.warning("No MESA RSP inlist files were generated.") except Exception as e: logger.error(f"Error during MESA RSP inlist generation: {e}", exc_info=True) else: logger.info("Skipping MESA RSP inlist generation: RSP workflow is disabled.") # --- START LOGIC FOR GYRE INPUT CSV GENERATION/LOADING --- if args.gyre_workflow.run_gyre_workflow: logger.info("GYRE workflow is enabled. Checking GYRE input CSV status.") gyre_input_df = pd.DataFrame() if gyre_input_csv_path is None: logger.warning("GYRE input CSV path not defined, skipping GYRE input CSV load/generation.") gyre_output_csv_path_returned = "" else: if os.path.exists(gyre_input_csv_path): try: gyre_input_df = pd.read_csv(gyre_input_csv_path) logger.info(f"Successfully loaded existing GYRE input CSV from {gyre_input_csv_path}") gyre_output_csv_path_returned = gyre_input_csv_path except Exception as e: logger.error(f"Failed to load existing GYRE input CSV from {gyre_input_csv_path}: {e}") logger.exception("GYRE input CSV loading exception details:") gyre_output_csv_path_returned = "" gyre_input_df = pd.DataFrame() else: logger.info(f"GYRE input CSV '{gyre_input_csv_name_from_config}' not found. Checking if generation is needed.") gyre_output_csv_path_returned = "" if gyre_input_df.empty: logger.info("Attempting to generate GYRE input CSV from MESA runs (independent path).") try: source_df_for_gyre = pd.DataFrame() if 'summary_df_raw' in locals() and not summary_df_raw.empty: # Reset index to make 'initial_Y' a column again for filtering source_df_for_gyre = summary_df_raw.reset_index().copy() logger.info("Using data from recently generated summary_df_raw for GYRE input.") elif not reanalysis_needed: logger.info("Scanning MESA runs to generate missing GYRE input CSV.") # These calls are now to the correctly imported functions mesa_run_infos = scan_mesa_runs(input_dir, inlist_name) if mesa_run_infos: temp_gyre_data = [] for run_info in mesa_run_infos: history_file_path = run_info['history_file_path'] try: df_full_history = get_data_from_history_file(history_file_path) if not df_full_history.empty: temp_gyre_data.append({ 'initial_mass': run_info['mass'], 'initial_Z': run_info['z'], 'initial_Y': run_info['y'], # NEW: Add initial_Y 'run_dir_path': run_info['run_dir_path'], 'first_model_number': df_full_history['model_number'].min(), 'last_model_number': df_full_history['model_number'].max(), 'first_age_yr': df_full_history['star_age'].min(), 'last_age_yr': df_full_history['star_age'].max() }) except Exception as e: logger.warning(f"Could not extract history data for GYRE input from {run_info['run_dir_path']}: {e}") if temp_gyre_data: source_df_for_gyre = pd.DataFrame(temp_gyre_data) logger.info(f"Successfully scanned {len(temp_gyre_data)} runs for GYRE input.") else: logger.warning("No runs found or data extracted for GYRE input from independent scan.") else: logger.info("No MESA runs found during independent scan for GYRE input.") else: pass # No source_df_for_gyre if reanalysis not needed and no summary_df_raw if not source_df_for_gyre.empty: if args.blue_loop_analysis.analyze_blue_loop: original_rows = len(source_df_for_gyre) if 'blue_loop_crossing_count' in source_df_for_gyre.columns: source_df_for_gyre = source_df_for_gyre[ (source_df_for_gyre['blue_loop_crossing_count'].notna()) & (source_df_for_gyre['blue_loop_crossing_count'] > 0) ].copy() if len(source_df_for_gyre) < original_rows: logger.info(f"Filtered GYRE input CSV: Removed {original_rows - len(source_df_for_gyre)} entries with no blue loop crossings (based on summary data).") else: logger.warning("Blue loop analysis is enabled, but 'blue_loop_crossing_count' not available in source data for GYRE input. Skipping blue loop filtering for GYRE input.") else: logger.info("Blue loop analysis is OFF. All successful MESA runs will be included in GYRE input CSV.") # Include 'initial_Y' in the columns for GYRE input gyre_input_cols = ['initial_mass', 'initial_Z', 'initial_Y', 'run_dir_path'] if 'first_model_number' in source_df_for_gyre.columns and 'last_model_number' in source_df_for_gyre.columns: gyre_input_cols.extend(['first_model_number', 'last_model_number']) gyre_input_df = source_df_for_gyre[gyre_input_cols].copy() else: logger.warning("'first_model_number' or 'last_model_number' not found in source data for GYRE. GYRE input CSV will only contain 'run_dir_path', 'initial_mass', 'initial_Z', 'initial_Y'.") gyre_input_df = source_df_for_gyre[[col for col in gyre_input_cols if col in source_df_for_gyre.columns]].copy() gyre_input_df['min_model_number'] = np.nan gyre_input_df['max_model_number'] = np.nan if not gyre_input_df.empty: gyre_input_df.rename(columns={ 'run_dir_path': 'mesa_run_directory', 'first_model_number': 'min_model_number', 'last_model_number': 'max_model_number' }, inplace=True) gyre_input_df['initial_Z'] = gyre_input_df['initial_Z'].apply(lambda x: f"{x:.4f}") gyre_input_df['initial_Y'] = gyre_input_df['initial_Y'].apply(lambda x: f"{x:.3f}") # Format initial_Y gyre_input_df.sort_values(['initial_Z', 'initial_Y', 'initial_mass'], inplace=True) # Sort by Y as well gyre_input_df.to_csv(gyre_input_csv_path, index=False, na_rep='NaN') logger.info(f"GYRE input CSV saved to: {gyre_input_csv_path}") gyre_output_csv_path_returned = gyre_input_csv_path else: logger.info("No data to write to GYRE input CSV after filtering or extraction.") gyre_output_csv_path_returned = "" else: logger.info("No source data available to generate GYRE input CSV.") gyre_output_csv_path_returned = "" except Exception as e: logger.error(f"Error generating GYRE input CSV '{gyre_input_csv_name_from_config}': {e}") logger.exception("GYRE input CSV generation exception details:") gyre_output_csv_path_returned = "" else: logger.info("Skipping GYRE input CSV generation: GYRE workflow is disabled in settings.") gyre_output_csv_path_returned = "" # Final sorting of combined_detail_data_for_plotting before returning if not combined_detail_data_for_plotting.empty: sort_cols = ['initial_Z', 'initial_mass', 'star_age'] if 'initial_Y' in combined_detail_data_for_plotting.columns: sort_cols.insert(1, 'initial_Y') # Insert Y after Z for sorting combined_detail_data_for_plotting = combined_detail_data_for_plotting.sort_values( by=sort_cols ).reset_index(drop=True) # Re-structure full_history_data_for_plotting to be a flat list of DataFrames for plotting, # or keep it nested as {Z: {Y: [DFs]}} if that structure is preferred by consumers. # For now, it's kept nested, but consider flattening it if the plotting functions expect a single list. flattened_full_history_data_for_plotting = [] for z_val in sorted(full_history_data_for_plotting.keys()): for y_val in sorted(full_history_data_for_plotting[z_val].keys()): flattened_full_history_data_for_plotting.extend(full_history_data_for_plotting[z_val][y_val]) # The return signature dictates a dict, so ensure it matches. # If the consumer expects a flat list, this might need adjustment in the calling code. # For now, we'll return the nested dictionary structure as built. return summary_df, combined_detail_data_for_plotting, flattened_full_history_data_for_plotting, gyre_output_csv_path_returned, generated_rsp_inlists_paths, generated_cross_csv_paths