Source code for mesalab.analyzis.mesa_analyzer
import os
import pandas as pd
import numpy as np
import yaml
import logging
from tqdm import tqdm
import sys
# --- CORRECTED IMPORTS ---
# Import the actual functions from their correct respective modules
from mesalab.analyzis.data_reader import scan_mesa_runs, get_data_from_history_file
from mesalab.bluelooptools.blue_loop_analyzer import analyze_blue_loop_and_instability
from mesalab.rsptools import generate_mesa_rsp_inlists
from mesalab.io.output_manager import create_output_directories
from mesalab.io.inlist_parser import get_mesa_params_from_inlist # NEW: Import to get initial_y
logger = logging.getLogger(__name__)
[docs]
def perform_mesa_analysis(args, analysis_results_sub_dir, detail_files_output_dir,
gyre_input_csv_name: str = 'sorted_blue_loop_profiles.csv',
rsp_output_subdir: str = None):
"""
Coordinates the analysis of MESA runs, including blue loop analysis,
data aggregation, and saving summary and detailed results.
Args:
args (argparse.Namespace): Command-line arguments containing input_dir,
inlist_name, analyze_blue_loop, blue_loop_output_type,
force_reanalysis.
analysis_results_sub_dir (str): Path to the directory for summary/cross-grid CSVs.
detail_files_output_dir (str): Path to the directory for detailed blue loop CSVs.
gyre_input_csv_name (str): The desired filename for the CSV containing profiles
information for the GYRE workflow. Defaults to 'sorted_blue_loop_profiles.csv'.
rsp_output_subdir (str, optional): Base directory where MESA RSP inlists should be saved.
Defaults to None, in which case the RSP inlists will be
generated relative to the original MESA run directories.
Returns:
tuple: A tuple containing:
- pd.DataFrame: The main summary DataFrame of analysis results.
- pd.DataFrame: A combined DataFrame of detailed blue loop data for plotting
(combined_detail_data_for_plotting).
- dict: A dictionary where keys are metallicities (Z) and values are
lists of full, untrimmed history DataFrames for plotting (full_history_data_for_plotting).
- str: The full path to the generated GYRE input CSV file. Returns an empty string
if the CSV was not generated.
- list: A list of paths to the generated RSP inlist files.
"""
# Directory creation logic is now handled by output_manager.create_output_directories
# The `create_output_directories` function from `output_manager.py`
# should be called in `cli.py` *before* calling `perform_mesa_analysis`.
# This function `perform_mesa_analysis` then receives the *already created*
# paths.
skipped_runs_log_path = os.path.join(analysis_results_sub_dir, "skipped_runs_log.txt")
# Clear old skipped runs log if reanalysis is forced
if args.general_settings.force_reanalysis and os.path.exists(skipped_runs_log_path):
try:
os.remove(skipped_runs_log_path)
logger.info(f"Removed old skipped runs log: {skipped_runs_log_path}")
except OSError as e:
logger.warning(f"Could not remove old skipped runs log {skipped_runs_log_path}: {e}")
input_dir = args.general_settings.input_dir
inlist_name = args.general_settings.inlist_name
analyze_blue_loop = args.blue_loop_analysis.analyze_blue_loop
blue_loop_output_type = args.blue_loop_analysis.blue_loop_output_type
force_reanalysis = args.general_settings.force_reanalysis
# These paths should ideally come from output_manager.get_analysis_file_paths in cli.py
# or be passed into this function. For now, they are redefined here for a self-contained
# update, but consider refactoring if these are also managed by output_manager.
summary_csv_path = os.path.join(analysis_results_sub_dir, "summary_results.csv")
# cross_csv_path will now be generated inside the loop for each Y value,
# so we'll store the paths in a list if needed to return multiple.
# For now, we'll keep `cross_csv_path` as a base for generating the filenames.
base_cross_csv_path = os.path.join(analysis_results_sub_dir, "crossing_count_grid")
# Initialize gyre_input_csv_name_from_config
gyre_input_csv_name_from_config = None
if args.gyre_workflow.run_gyre_workflow:
gyre_input_csv_name_from_config = args.gyre_workflow.filtered_profiles_csv_name
if gyre_input_csv_name_from_config is None:
logger.warning("GYRE workflow enabled but 'filtered_profiles_csv_name' not specified in config. Using 'sorted_blue_loop_profiles.csv' as default.")
gyre_input_csv_name_from_config = "sorted_blue_loop_profiles.csv"
gyre_input_csv_path = None
if gyre_input_csv_name_from_config is not None:
gyre_input_csv_path = os.path.join(analysis_results_sub_dir, gyre_input_csv_name_from_config)
else:
logger.info("GYRE input CSV path not initialized as GYRE workflow is disabled or name is missing.")
# Reanalysis logic needs to be updated to account for multiple cross-grid files
# For simplicity, we'll assume if *any* cross-grid exists, and force_reanalysis is False,
# we try to load. A more robust solution might check all expected Y-specific cross-grids.
reanalysis_needed = force_reanalysis or \
not os.path.exists(summary_csv_path)
# We can't easily check for all potential cross_csv_paths here without knowing unique_ys beforehand.
# So, if reanalysis_needed is false based on summary_csv, we'll proceed, but cross-grids will be
# generated if they don't exist later.
detail_csvs_exist = True
if analyze_blue_loop:
if not os.path.exists(detail_files_output_dir) or not os.listdir(detail_files_output_dir):
detail_csvs_exist = False
if not force_reanalysis:
logger.info(f"Detailed blue loop CSVs not found in '{detail_files_output_dir}'. Forcing reanalysis to generate them for plotting.")
reanalysis_needed = True
logger.info(f"Analysis started. Full reanalysis needed: {reanalysis_needed}")
summary_df = pd.DataFrame()
combined_detail_data_for_plotting = pd.DataFrame()
full_history_data_for_plotting = {}
gyre_output_csv_path_returned = ""
generated_rsp_inlists_paths = []
generated_cross_csv_paths = [] # NEW: To store paths of generated cross-grids
if not reanalysis_needed:
logger.info("Summary and cross-grid CSV files already exist. Attempting to load existing data.")
try:
# Load summary_df
loaded_summary_df = pd.read_csv(summary_csv_path) # Load as regular DataFrame first
# Check for initial_Y in columns before setting index
index_cols = ['initial_Z', 'initial_mass']
if 'initial_Y' in loaded_summary_df.columns:
index_cols.insert(1, 'initial_Y') # Insert Y after Z
if all(col in loaded_summary_df.columns for col in index_cols):
loaded_summary_df.set_index(index_cols, inplace=True)
else:
logger.warning(f"Loaded summary CSV is missing one or more expected index columns ({index_cols}). Cannot set index properly. Reanalysis might be needed.")
reanalysis_needed = True # Force reanalysis if index columns are missing
if not reanalysis_needed: # Only proceed if index setting was successful or not needed
if analyze_blue_loop:
filtered_loaded_summary_df = loaded_summary_df[
(loaded_summary_df['blue_loop_crossing_count'].notna()) &
(loaded_summary_df['blue_loop_crossing_count'] > 0)
].copy()
else:
filtered_loaded_summary_df = loaded_summary_df.copy()
logger.info(f"Blue loop analysis is OFF. All entries from '{summary_csv_path}' will be considered for GYRE input.")
if filtered_loaded_summary_df.empty and not loaded_summary_df.empty:
logger.warning("Loaded summary CSV contained no valid blue loop entries after filtering. Forcing reanalysis if blue loop analysis is on.")
if analyze_blue_loop:
reanalysis_needed = True
else:
logger.info("No successful MESA runs found in loaded summary CSV. Cannot generate GYRE input.")
return pd.DataFrame(), pd.DataFrame(), {}, "", [], [] # Return empty list for cross_csv_paths
elif filtered_loaded_summary_df.empty and loaded_summary_df.empty:
logger.info("Loaded summary CSV was empty. No valid entries found.")
return pd.DataFrame(), pd.DataFrame(), {}, "", [], [] # Return empty list for cross_csv_paths
else:
summary_df = filtered_loaded_summary_df
logger.info("Successfully loaded and filtered existing summary CSV.")
if args.gyre_workflow.get('run_gyre_workflow', False) and gyre_input_csv_path:
if os.path.exists(gyre_input_csv_path):
try:
gyre_input_df_loaded = pd.read_csv(gyre_input_csv_path)
logger.info(f"Successfully loaded existing GYRE input CSV from {gyre_input_csv_path}")
gyre_output_csv_path_returned = gyre_input_csv_path
except Exception as e:
logger.error(f"Failed to load existing GYRE input CSV from {gyre_input_csv_path}: {e}")
logger.exception("GYRE input CSV loading exception details:")
gyre_output_csv_path_returned = ""
else:
logger.warning(f"GYRE workflow enabled, but existing GYRE input CSV '{gyre_input_csv_name_from_config}' not found at {gyre_input_csv_path}. It will not be generated unless a full reanalysis is triggered.")
gyre_output_csv_path_returned = ""
else:
logger.info("GYRE workflow is disabled or GYRE input CSV path not defined. Skipping loading of existing GYRE input CSV.")
gyre_output_csv_path_returned = ""
if not reanalysis_needed:
logger.info("Detail data not in memory; attempting to load from disk for plotting...")
combined_detail_dfs = []
if os.path.exists(detail_files_output_dir):
logger.info(f"Loading CSV files from '{detail_files_output_dir}'...")
for f_name in os.listdir(detail_files_output_dir):
if f_name.endswith(".csv"):
try:
df = pd.read_csv(os.path.join(detail_files_output_dir, f_name))
combined_detail_dfs.append(df)
except Exception as e:
logger.warning(f"Failed to load detail CSV '{f_name}': {e}")
if combined_detail_dfs:
combined_detail_data_for_plotting = pd.concat(combined_detail_dfs, ignore_index=True)
logger.info(f"Successfully loaded {len(combined_detail_dfs)} detail CSVs.")
# Ensure sorting by initial_Y as well if it's in the data
sort_cols = ['initial_Z', 'initial_mass', 'star_age']
if 'initial_Y' in combined_detail_data_for_plotting.columns:
sort_cols.insert(1, 'initial_Y') # Insert Y after Z
combined_detail_data_for_plotting = combined_detail_data_for_plotting.sort_values(
by=sort_cols
).reset_index(drop=True)
else:
logger.error(f"No CSV files loaded from '{detail_files_output_dir}'.")
else:
logger.error(f"Detail files output directory '{detail_files_output_dir}' does not exist.")
# Also try to load existing cross-grid CSVs.
# This will require scanning for files matching the new naming convention.
if os.path.exists(analysis_results_sub_dir):
for fname in os.listdir(analysis_results_sub_dir):
if fname.startswith("crossing_count_grid") and fname.endswith(".csv"):
generated_cross_csv_paths.append(os.path.join(analysis_results_sub_dir, fname))
if not generated_cross_csv_paths:
logger.warning("No existing cross-grid CSVs found. Full reanalysis might be needed to generate them.")
reanalysis_needed = True # If we didn't find them, force reanalysis to generate.
# This return needs to be inside the "if not reanalysis_needed" block to actually return
if not reanalysis_needed:
return summary_df, combined_detail_data_for_plotting, full_history_data_for_plotting, gyre_output_csv_path_returned, generated_rsp_inlists_paths, generated_cross_csv_paths # NEW: return list of cross_csv_paths
except FileNotFoundError:
logger.warning(f"Existing summary or cross-grid CSVs not found. Forcing full reanalysis.")
reanalysis_needed = True
except Exception as e:
logger.error(f"Error loading existing summary CSV: {e}. Forcing full reanalysis.")
logger.exception("Error details:")
reanalysis_needed = True
if reanalysis_needed:
logger.info("Starting full analysis of MESA runs...")
mesa_run_infos = scan_mesa_runs(input_dir, inlist_name)
if not mesa_run_infos:
logger.info("No MESA runs found for full analysis. Returning empty DataFrames.")
return pd.DataFrame(), pd.DataFrame(), {}, "", [], [] # Return empty list for cross_csv_paths
# Extract all unique parameters for index/columns
unique_masses = sorted(set(run['mass'] for run in mesa_run_infos))
unique_zs = sorted(set(run['z'] for run in mesa_run_infos))
unique_ys = sorted(set(run['y'] for run in mesa_run_infos)) # NEW: Get unique Y values
if not unique_masses or not unique_zs or not unique_ys: # NEW: Check Y as well
logger.error("Error: Could not determine unique masses, metallicities, or helium abundances from runs. Returning empty DataFrames.")
return pd.DataFrame(), pd.DataFrame(), {}, "", [], [] # Return empty list for cross_csv_paths
# We will generate `cross_data_matrix` per Y value later.
# cross_data_matrix = pd.DataFrame(np.nan, index=unique_zs, columns=unique_masses)
# cross_data_matrix.index.name = "Z"
# cross_data_matrix.columns.name = "Mass"
summary_data = []
# Group detailed data by Z and Y
grouped_detailed_dfs_for_analysis_raw = {z_val: {y_val: [] for y_val in unique_ys} for z_val in unique_zs}
full_history_data_for_plotting = {z_val: {y_val: [] for y_val in unique_ys} for z_val in unique_zs} # Nested dict for Z and Y
yaml_data = {}
for run_info in sorted(mesa_run_infos, key=lambda x: (x['z'], x['y'], x['mass'])): # Sort by Y too
z_key = f"Z_{run_info['z']:.4f}"
y_key = f"Y_{run_info['y']:.3f}" # NEW: Y key for YAML
if z_key not in yaml_data:
yaml_data[z_key] = {}
if y_key not in yaml_data[z_key]: # NEW: Nested Y under Z in YAML
yaml_data[z_key][y_key] = {}
mass_key = f"M_{run_info['mass']:.1f}"
yaml_data[z_key][y_key][mass_key] = { # Store under Z, Y, Mass
'run_directory': os.path.basename(run_info['run_dir_path']),
'history_file': os.path.basename(run_info['history_file_path']),
}
yaml_file_path = os.path.join(analysis_results_sub_dir, "processed_runs_overview.yaml")
try:
with open(yaml_file_path, 'w') as f:
yaml.dump(yaml_data, f, indent=4, sort_keys=False)
logger.info(f"Generated YAML overview of processed runs: {yaml_file_path}")
except Exception as e:
logger.error(f"Could not write YAML overview file: {e}")
if os.path.exists(skipped_runs_log_path):
os.remove(skipped_runs_log_path) # Clear previous log
total_runs_for_analysis = len(mesa_run_infos)
with tqdm(total=total_runs_for_analysis, desc="Performing MESA Run Analysis") as pbar:
for run_info in mesa_run_infos:
current_mass = run_info['mass']
current_z = run_info['z']
current_y = run_info['y']
history_file_path = run_info['history_file_path']
run_dir_path = run_info['run_dir_path']
analysis_result_summary = {
'initial_mass': current_mass,
'initial_Z': current_z,
'initial_Y': current_y,
'run_dir_path': run_dir_path,
'blue_loop_crossing_count': np.nan,
'min_log_L': np.nan, 'min_log_Teff': np.nan,'min_log_R': np.nan,
'max_log_L': np.nan, 'max_log_Teff': np.nan,'max_log_R': np.nan,
'first_model_number': np.nan, 'last_model_number': np.nan,
'first_age_yr': np.nan, 'last_age_yr': np.nan, 'blue_loop_start_age': np.nan,
'blue_loop_end_age': np.nan, 'instability_start_age': np.nan,
'instability_end_age': np.nan, 'calculated_blue_loop_duration': np.nan,
'calculated_instability_duration': np.nan,
'analysis_status': 'Success' # Default status
}
current_detail_df = pd.DataFrame()
try:
df_full_history = get_data_from_history_file(history_file_path)
df_full_history['initial_mass'] = current_mass
df_full_history['initial_Z'] = current_z
df_full_history['initial_Y'] = current_y
df_full_history['run_dir_path'] = run_dir_path
if current_z not in full_history_data_for_plotting:
full_history_data_for_plotting[current_z] = {} # Ensure nested dict
if current_y not in full_history_data_for_plotting[current_z]: # Ensure nested dict
full_history_data_for_plotting[current_z][current_y] = []
full_history_data_for_plotting[current_z][current_y].append(df_full_history.copy()) # Store by Z and Y
if analyze_blue_loop:
analyzer_output = analyze_blue_loop_and_instability(df_full_history, current_mass, current_z, current_y)
if not analyzer_output['blue_loop_detail_df'].empty:
bl_df = analyzer_output['blue_loop_detail_df'].copy()
bl_df['run_dir_path'] = run_dir_path # Ensure run_dir_path is also in bl_df for consistency
bl_df['initial_mass'] = current_mass # Add mass to detail df
bl_df['initial_Z'] = current_z # Add Z to detail df
bl_df['initial_Y'] = current_y # NEW: Add initial_Y to bl_df
if blue_loop_output_type == 'all':
current_detail_df = bl_df
else:
concise_detail_columns_local = [
'initial_mass', 'initial_Z', 'initial_Y', 'star_age', 'model_number',
'log_Teff', 'log_L', 'log_g', 'profile_number',
'run_dir_path'
]
existing_desired_cols = [col for col in concise_detail_columns_local if col in bl_df.columns]
if existing_desired_cols:
current_detail_df = bl_df[existing_desired_cols]
else:
logger.warning(f"No desired columns found for concise detail for M={current_mass}, Z={current_z}, Y={current_y}. Detail DF for plotting might remain empty.")
current_detail_df = pd.DataFrame()
else:
logger.info(f"analyzer_output['blue_loop_detail_df'] was empty for M={current_mass}, Z={current_z}, Y={current_y}. No detailed data for this run.")
if pd.notna(analyzer_output['crossing_count']):
analysis_result_summary['blue_loop_crossing_count'] = int(analyzer_output['crossing_count'])
if analysis_result_summary['blue_loop_crossing_count'] > 0:
state_times = analyzer_output['state_times']
# These metrics are calcualted in the blueloop analyzer, so we just copy them
analysis_result_summary['blue_loop_start_age'] = state_times.get('first_is_entry_age', np.nan)
analysis_result_summary['blue_loop_end_age'] = state_times.get('last_is_exit_age', np.nan)
analysis_result_summary['calculated_blue_loop_duration'] = analyzer_output.get('calculated_blue_loop_duration', np.nan)
analysis_result_summary['instability_start_age'] = state_times.get('instability_start_age', np.nan)
analysis_result_summary['instability_end_age'] = state_times.get('instability_end_age', np.nan)
analysis_result_summary['calculated_instability_duration'] = analyzer_output.get('calculated_instability_duration', np.nan)
if not current_detail_df.empty:
analysis_result_summary['min_log_L'] = current_detail_df['log_L'].min()
analysis_result_summary['min_log_Teff'] = current_detail_df['log_Teff'].min()
if 'log_R' in current_detail_df.columns:
analysis_result_summary['min_log_R'] = current_detail_df['log_R'].min()
elif 'log_R' in df_full_history.columns:
analysis_result_summary['min_log_R'] = df_full_history['log_R'].min()
analysis_result_summary['max_log_L'] = current_detail_df['log_L'].max()
analysis_result_summary['max_log_Teff'] = current_detail_df['log_Teff'].max()
if 'log_R' in current_detail_df.columns:
analysis_result_summary['max_log_R'] = current_detail_df['log_R'].max()
elif 'log_R' in df_full_history.columns:
analysis_result_summary['max_log_R'] = df_full_history['log_R'].max()
analysis_result_summary['first_model_number'] = current_detail_df['model_number'].min()
analysis_result_summary['last_model_number'] = current_detail_df['model_number'].max()
analysis_result_summary['first_age_yr'] = current_detail_df['star_age'].min()
analysis_result_summary['last_age_yr'] = current_detail_df['star_age'].max()
else:
logger.warning(f"current_detail_df is empty for M={current_mass}, Z={current_z}, Y={current_y} despite blue loop found (count > 0). Detailed summary metrics will be NaN.")
else:
logger.info(f"No blue loop found (0 crossings) for M={current_mass}, Z={current_z}, Y={current_y}. Blue loop summary metrics will be NaN.")
else:
logger.warning(f"Blue loop analysis failed for M={current_mass}, Z={current_z}, Y={current_y}. Blue loop summary metrics will be NaN.")
current_detail_df = pd.DataFrame()
else:
logger.info(f"Skipping blue loop analysis for M={current_mass}, Z={current_z}, Y={current_y} as analyze_blue_loop is False.") # Add Y to message
analysis_result_summary['blue_loop_crossing_count'] = np.nan
current_detail_df = pd.DataFrame()
summary_data.append(analysis_result_summary)
if analyze_blue_loop and not current_detail_df.empty:
# Ensure proper nested structure for grouped_detailed_dfs_for_analysis_raw
if current_z not in grouped_detailed_dfs_for_analysis_raw:
grouped_detailed_dfs_for_analysis_raw[current_z] = {}
if current_y not in grouped_detailed_dfs_for_analysis_raw[current_z]:
grouped_detailed_dfs_for_analysis_raw[current_z][current_y] = []
grouped_detailed_dfs_for_analysis_raw[current_z][current_y].append(current_detail_df)
# This part ensures that `combined_detail_data_for_plotting` is built correctly
# by concatenating all `current_detail_df` instances as they are processed.
if combined_detail_data_for_plotting.empty:
combined_detail_data_for_plotting = current_detail_df.copy()
else:
combined_detail_data_for_plotting = pd.concat([combined_detail_data_for_plotting, current_detail_df], ignore_index=True)
except Exception as err:
with open(skipped_runs_log_path, 'a') as log_file:
log_file.write(f"Skipped run {run_info['run_dir_path']} due to error: {err}\n")
logger.error(f"Skipped run {run_info['run_dir_path']} due to error: {err}")
logger.exception(f"Exception details for run {run_info['run_dir_path']}:")
# Append a summary entry even if there was an error, marking it as such
error_summary = {
'initial_mass': current_mass,
'initial_Z': current_z,
'initial_Y': current_y, # Ensure Y is recorded even on error
'run_dir_path': run_dir_path,
'blue_loop_crossing_count': np.nan,
'min_log_L': np.nan, 'min_log_Teff': np.nan, 'min_log_R': np.nan,
'max_log_L': np.nan, 'max_log_Teff': np.nan, 'max_log_R': np.nan,
'first_model_number': np.nan, 'last_model_number': np.nan,
'first_age_yr': np.nan, 'last_age_yr': np.nan,
'blue_loop_start_age': np.nan, 'blue_loop_end_age': np.nan,
'instability_start_age': np.nan, 'instability_end_age': np.nan,
'calculated_blue_loop_duration': np.nan,
'calculated_instability_duration': np.nan,
'analysis_status': f"Error: {str(err)[:100]}" # Truncate error message
}
summary_data.append(error_summary)
pbar.update(1)
summary_df_raw = pd.DataFrame(summary_data)
# Ensure 'initial_Y' is included in the sort and index for comprehensive results
summary_df_raw.sort_values(['initial_Z', 'initial_Y', 'initial_mass'], inplace=True)
summary_df_raw.set_index(['initial_Z', 'initial_Y', 'initial_mass'], inplace=True) # NEW: Set Y as part of the index
if analyze_blue_loop:
summary_df_to_save = summary_df_raw[
(summary_df_raw['blue_loop_crossing_count'].notna()) &
(summary_df_raw['blue_loop_crossing_count'] > 0)
].copy()
if summary_df_to_save.empty:
logger.info("No valid blue loop entries found after filtering for summary CSV.")
else:
logger.info(f"Generated summary CSV will contain {len(summary_df_to_save)} blue loop entries.")
else:
summary_df_to_save = summary_df_raw.copy()
logger.info(f"Blue loop analysis is OFF. Generated summary CSV will contain all {len(summary_df_to_save)} successful MESA runs.")
if summary_df_to_save.empty:
# Re-define columns including 'initial_Y'
summary_df = pd.DataFrame(columns=[
'initial_mass', 'initial_Z', 'initial_Y', 'run_dir_path', # Added initial_Y here
'blue_loop_crossing_count',
'blue_loop_start_age', 'blue_loop_end_age',
'instability_start_age', 'instability_end_age',
'calculated_blue_loop_duration', 'calculated_instability_duration',
'min_log_L', 'min_log_Teff', 'min_log_R',
'max_log_L', 'max_log_Teff', 'max_log_R',
'first_model_number', 'last_model_number',
'first_age_yr', 'last_age_yr', 'analysis_status' # Include analysis_status
], index=pd.MultiIndex.from_tuples([], names=['initial_Z', 'initial_Y', 'initial_mass']))
else:
summary_df = summary_df_to_save.copy()
if blue_loop_output_type == 'summary' and analyze_blue_loop:
logger.info("Applying 'summary' output type filtering for summary CSV columns.")
summary_columns_for_summary_output = [
'blue_loop_crossing_count',
'blue_loop_start_age', 'blue_loop_end_age',
'instability_start_age', 'instability_end_age',
'calculated_blue_loop_duration', 'calculated_instability_duration',
'run_dir_path',
'analysis_status' # Keep status
]
existing_summary_cols = [col for col in summary_columns_for_summary_output if col in summary_df.columns]
summary_df = summary_df[existing_summary_cols].copy()
summary_df.to_csv(summary_csv_path, na_rep='NaN')
logger.info(f"Summary CSV written to {summary_csv_path}")
# --- MODIFIED CROSS-GRID GENERATION LOGIC ---
# Iterate through each unique Y value to create a separate cross-grid.
# This resolves the "cannot reindex on an axis with duplicate labels" error.
generated_cross_csv_paths = [] # Reset for reanalysis case
if 'blue_loop_crossing_count' in summary_df_raw.columns:
for y_val in unique_ys:
logger.info(f"Generating cross-grid for initial_Y = {y_val:.3f}...")
# Filter the summary data for the current Y value
# We need to reset index first to filter on 'initial_Y' column, then set it back or unstack.
# A simpler way is to use .xs() if 'initial_Y' is part of a MultiIndex, then unstack.
# If summary_df_raw is already multi-indexed by (Z, Y, Mass):
try:
summary_filtered_by_y = summary_df_raw.xs(y_val, level='initial_Y', drop_level=False)
# Now, create the cross-grid (Z vs Mass) for this specific Y
# We need to drop the 'initial_Y' level after filtering, then unstack 'initial_mass'
cross_data_matrix_for_y = summary_filtered_by_y['blue_loop_crossing_count'].droplevel('initial_Y').unstack(level='initial_mass')
if not cross_data_matrix_for_y.empty:
# Ensure columns and index are numeric for sorting
cross_data_matrix_for_y.columns = pd.to_numeric(cross_data_matrix_for_y.columns, errors='coerce')
cross_data_matrix_for_y.index = pd.to_numeric(cross_data_matrix_for_y.index, errors='coerce')
# Sort the index and columns. The reindex on unique values is now safe
# because we've filtered by Y, removing the source of duplicates on the Z axis.
cross_data_matrix_for_y = cross_data_matrix_for_y.reindex(
index=sorted(cross_data_matrix_for_y.index.unique()),
columns=sorted(cross_data_matrix_for_y.columns.unique())
)
cross_data_matrix_for_y = cross_data_matrix_for_y.where(pd.notna(cross_data_matrix_for_y), np.nan)
else:
logger.warning(f"Cross-grid matrix is empty for initial_Y={y_val:.3f}. It might be due to no blue loop crossings for this Y or data structure issues.")
# Still create an empty DataFrame with expected dimensions if no data
cross_data_matrix_for_y = pd.DataFrame(np.nan, index=unique_zs, columns=unique_masses)
# Construct a new filename including the Y value
# cross_csv_path_for_y = f"{base_cross_csv_path}_Y{y_val:.3f}.csv" # Example: crossing_count_grid_Y0.256.csv
cross_csv_path_for_y = f"{base_cross_csv_path}.csv"
cross_data_matrix_for_y.to_csv(cross_csv_path_for_y, na_rep='NaN')
generated_cross_csv_paths.append(cross_csv_path_for_y)
logger.info(f"Cross-grid CSV for Y={y_val:.3f} written to {cross_csv_path_for_y}")
except KeyError as ke:
logger.warning(f"Skipping cross-grid generation for Y={y_val:.3f} due to missing key in summary data: {ke}")
except Exception as e:
logger.error(f"Critical error during cross-grid generation for Y={y_val:.3f}: {e}", exc_info=True)
else:
logger.warning("No 'blue_loop_crossing_count' in summary_df_raw. No cross-grid matrices will be generated.")
# Combine all detailed data for plotting/RSP generation
combined_detail_data_for_plotting = pd.DataFrame()
if analyze_blue_loop:
concise_detail_columns_for_saving = [
'initial_mass', 'initial_Z', 'initial_Y', 'star_age', 'model_number', # NEW: Add initial_Y here
'log_Teff', 'log_L', 'log_g', 'profile_number',
'run_dir_path'
]
for z_val, y_dict in grouped_detailed_dfs_for_analysis_raw.items(): # Iterate through Z and Y
for y_val, dfs_list in y_dict.items():
if dfs_list:
try:
combined_df_bl = pd.concat(dfs_list, ignore_index=True)
# Sort by Y, then Mass, then Age for consistent output
combined_df_bl = combined_df_bl.sort_values(by=['initial_Y', 'initial_mass', 'star_age']).reset_index(drop=True)
if blue_loop_output_type == 'all':
df_to_save = combined_df_bl
output_type_label = "all columns"
else:
existing_desired_cols = [col for col in concise_detail_columns_for_saving if col in combined_df_bl.columns]
if not existing_desired_cols:
logger.warning(f"No desired columns found for concise detail CSV for Z={z_val}, Y={y_val}. Skipping detail CSV write.")
continue
df_to_save = combined_df_bl[existing_desired_cols]
output_type_label = "selected columns"
# detail_filename = os.path.join(detail_files_output_dir, f"detail_z{z_val:.4f}_y{y_val:.3f}.csv")
detail_filename = os.path.join(detail_files_output_dir, f"detail_z{z_val:.4f}.csv")
df_to_save.to_csv(detail_filename, index=False, na_rep='NaN')
logger.info(f"Written concatenated detail CSV for Z={z_val}, Y={y_val} with {output_type_label} to {detail_filename}") # Log with Y
# Accumulate into combined_detail_data_for_plotting only if it hasn't been populated from disk already
# This block is for building it during reanalysis
if combined_detail_data_for_plotting.empty:
combined_detail_data_for_plotting = df_to_save.copy()
else:
combined_detail_data_for_plotting = pd.concat([combined_detail_data_for_plotting, df_to_save], ignore_index=True)
except Exception as e:
logger.error(f"Error writing detail CSV for Z={z_val}, Y={y_val}: {e}", exc_info=True) # Log with Y
else:
logger.info(f"No detailed data to write for Z={z_val}, Y={y_val}.") # Log with Y
else:
logger.info("Skipping detailed blue loop CSV generation: Blue loop analysis is disabled.")
# Logic for RSP inlists generation
if args.rsp_workflow.run_rsp_workflow:
logger.info("Generating MESA RSP inlists.")
try:
rsp_template_path = args.rsp_workflow.rsp_inlist_template_path
rsp_output_dir = rsp_output_subdir
try:
generated_rsp_inlists_paths = generate_mesa_rsp_inlists(
detail_df=combined_detail_data_for_plotting, # Use the combined df which now includes initial_Y
mesa_output_base_dir=input_dir, # The root directory of your MESA runs
rsp_inlist_template_path=rsp_template_path,
rsp_output_subdir=rsp_output_dir
)
except TypeError as e:
logger.error(f"Error during MESA RSP inlist generation: {e}")
# Return a value that indicates failure or exit the program
return {"status": "error", "message": str(e)}
# Check if the function returned an error status, and handle it gracefully
if isinstance(generated_rsp_inlists_paths, dict) and generated_rsp_inlists_paths.get("status") == "error":
return {"successful": False, "message": "RSP inlist generation failed."}
if generated_rsp_inlists_paths:
logger.info(f"Successfully generated {len(generated_rsp_inlists_paths)} MESA RSP inlist files.")
else:
logger.warning("No MESA RSP inlist files were generated.")
except Exception as e:
logger.error(f"Error during MESA RSP inlist generation: {e}", exc_info=True)
else:
logger.info("Skipping MESA RSP inlist generation: RSP workflow is disabled.")
# --- START LOGIC FOR GYRE INPUT CSV GENERATION/LOADING ---
if args.gyre_workflow.run_gyre_workflow:
logger.info("GYRE workflow is enabled. Checking GYRE input CSV status.")
gyre_input_df = pd.DataFrame()
if gyre_input_csv_path is None:
logger.warning("GYRE input CSV path not defined, skipping GYRE input CSV load/generation.")
gyre_output_csv_path_returned = ""
else:
if os.path.exists(gyre_input_csv_path):
try:
gyre_input_df = pd.read_csv(gyre_input_csv_path)
logger.info(f"Successfully loaded existing GYRE input CSV from {gyre_input_csv_path}")
gyre_output_csv_path_returned = gyre_input_csv_path
except Exception as e:
logger.error(f"Failed to load existing GYRE input CSV from {gyre_input_csv_path}: {e}")
logger.exception("GYRE input CSV loading exception details:")
gyre_output_csv_path_returned = ""
gyre_input_df = pd.DataFrame()
else:
logger.info(f"GYRE input CSV '{gyre_input_csv_name_from_config}' not found. Checking if generation is needed.")
gyre_output_csv_path_returned = ""
if gyre_input_df.empty:
logger.info("Attempting to generate GYRE input CSV from MESA runs (independent path).")
try:
source_df_for_gyre = pd.DataFrame()
if 'summary_df_raw' in locals() and not summary_df_raw.empty:
# Reset index to make 'initial_Y' a column again for filtering
source_df_for_gyre = summary_df_raw.reset_index().copy()
logger.info("Using data from recently generated summary_df_raw for GYRE input.")
elif not reanalysis_needed:
logger.info("Scanning MESA runs to generate missing GYRE input CSV.")
# These calls are now to the correctly imported functions
mesa_run_infos = scan_mesa_runs(input_dir, inlist_name)
if mesa_run_infos:
temp_gyre_data = []
for run_info in mesa_run_infos:
history_file_path = run_info['history_file_path']
try:
df_full_history = get_data_from_history_file(history_file_path)
if not df_full_history.empty:
temp_gyre_data.append({
'initial_mass': run_info['mass'],
'initial_Z': run_info['z'],
'initial_Y': run_info['y'], # NEW: Add initial_Y
'run_dir_path': run_info['run_dir_path'],
'first_model_number': df_full_history['model_number'].min(),
'last_model_number': df_full_history['model_number'].max(),
'first_age_yr': df_full_history['star_age'].min(),
'last_age_yr': df_full_history['star_age'].max()
})
except Exception as e:
logger.warning(f"Could not extract history data for GYRE input from {run_info['run_dir_path']}: {e}")
if temp_gyre_data:
source_df_for_gyre = pd.DataFrame(temp_gyre_data)
logger.info(f"Successfully scanned {len(temp_gyre_data)} runs for GYRE input.")
else:
logger.warning("No runs found or data extracted for GYRE input from independent scan.")
else:
logger.info("No MESA runs found during independent scan for GYRE input.")
else:
pass # No source_df_for_gyre if reanalysis not needed and no summary_df_raw
if not source_df_for_gyre.empty:
if args.blue_loop_analysis.analyze_blue_loop:
original_rows = len(source_df_for_gyre)
if 'blue_loop_crossing_count' in source_df_for_gyre.columns:
source_df_for_gyre = source_df_for_gyre[
(source_df_for_gyre['blue_loop_crossing_count'].notna()) &
(source_df_for_gyre['blue_loop_crossing_count'] > 0)
].copy()
if len(source_df_for_gyre) < original_rows:
logger.info(f"Filtered GYRE input CSV: Removed {original_rows - len(source_df_for_gyre)} entries with no blue loop crossings (based on summary data).")
else:
logger.warning("Blue loop analysis is enabled, but 'blue_loop_crossing_count' not available in source data for GYRE input. Skipping blue loop filtering for GYRE input.")
else:
logger.info("Blue loop analysis is OFF. All successful MESA runs will be included in GYRE input CSV.")
# Include 'initial_Y' in the columns for GYRE input
gyre_input_cols = ['initial_mass', 'initial_Z', 'initial_Y', 'run_dir_path']
if 'first_model_number' in source_df_for_gyre.columns and 'last_model_number' in source_df_for_gyre.columns:
gyre_input_cols.extend(['first_model_number', 'last_model_number'])
gyre_input_df = source_df_for_gyre[gyre_input_cols].copy()
else:
logger.warning("'first_model_number' or 'last_model_number' not found in source data for GYRE. GYRE input CSV will only contain 'run_dir_path', 'initial_mass', 'initial_Z', 'initial_Y'.")
gyre_input_df = source_df_for_gyre[[col for col in gyre_input_cols if col in source_df_for_gyre.columns]].copy()
gyre_input_df['min_model_number'] = np.nan
gyre_input_df['max_model_number'] = np.nan
if not gyre_input_df.empty:
gyre_input_df.rename(columns={
'run_dir_path': 'mesa_run_directory',
'first_model_number': 'min_model_number',
'last_model_number': 'max_model_number'
}, inplace=True)
gyre_input_df['initial_Z'] = gyre_input_df['initial_Z'].apply(lambda x: f"{x:.4f}")
gyre_input_df['initial_Y'] = gyre_input_df['initial_Y'].apply(lambda x: f"{x:.3f}") # Format initial_Y
gyre_input_df.sort_values(['initial_Z', 'initial_Y', 'initial_mass'], inplace=True) # Sort by Y as well
gyre_input_df.to_csv(gyre_input_csv_path, index=False, na_rep='NaN')
logger.info(f"GYRE input CSV saved to: {gyre_input_csv_path}")
gyre_output_csv_path_returned = gyre_input_csv_path
else:
logger.info("No data to write to GYRE input CSV after filtering or extraction.")
gyre_output_csv_path_returned = ""
else:
logger.info("No source data available to generate GYRE input CSV.")
gyre_output_csv_path_returned = ""
except Exception as e:
logger.error(f"Error generating GYRE input CSV '{gyre_input_csv_name_from_config}': {e}")
logger.exception("GYRE input CSV generation exception details:")
gyre_output_csv_path_returned = ""
else:
logger.info("Skipping GYRE input CSV generation: GYRE workflow is disabled in settings.")
gyre_output_csv_path_returned = ""
# Final sorting of combined_detail_data_for_plotting before returning
if not combined_detail_data_for_plotting.empty:
sort_cols = ['initial_Z', 'initial_mass', 'star_age']
if 'initial_Y' in combined_detail_data_for_plotting.columns:
sort_cols.insert(1, 'initial_Y') # Insert Y after Z for sorting
combined_detail_data_for_plotting = combined_detail_data_for_plotting.sort_values(
by=sort_cols
).reset_index(drop=True)
# Re-structure full_history_data_for_plotting to be a flat list of DataFrames for plotting,
# or keep it nested as {Z: {Y: [DFs]}} if that structure is preferred by consumers.
# For now, it's kept nested, but consider flattening it if the plotting functions expect a single list.
flattened_full_history_data_for_plotting = []
for z_val in sorted(full_history_data_for_plotting.keys()):
for y_val in sorted(full_history_data_for_plotting[z_val].keys()):
flattened_full_history_data_for_plotting.extend(full_history_data_for_plotting[z_val][y_val])
# The return signature dictates a dict, so ensure it matches.
# If the consumer expects a flat list, this might need adjustment in the calling code.
# For now, we'll return the nested dictionary structure as built.
return summary_df, combined_detail_data_for_plotting, flattened_full_history_data_for_plotting, gyre_output_csv_path_returned, generated_rsp_inlists_paths, generated_cross_csv_paths