import pandas as pd
import numpy as np
import re
import os
import logging
logger = logging.getLogger(__name__)
[docs]
def scan_mesa_runs(input_dir, inlist_name):
"""
Scan a directory for MESA run subdirectories that contain both an inlist file and history.data.
Each valid subdirectory must:
- Be a direct (non-hidden) subdirectory of `input_dir`
- Contain the specified `inlist_name` file
- Contain a LOGS/history.data file
The function attempts to extract `initial_mass`, `initial_Z`, and `initial_Y` from each inlist file.
Only runs with both `mass` and `z` values present are included in the output.
Args:
input_dir (str): Absolute path to the main directory containing MESA run subdirectories.
inlist_name (str): Name of the inlist file expected in each subdirectory (e.g., 'inlist').
Returns:
list of dict: Each dictionary represents a valid MESA run and contains:
- 'history_file_path' (str): Full path to the history.data file
- 'run_dir_path' (str): Full path to the MESA run directory
- 'mass' (float): Extracted initial_mass
- 'z' (float): Extracted initial_Z
- 'y' (float or None): Extracted initial_Y
Returns an empty list if no valid runs are found.
Example:
Given a directory structure like::
/path/to/mesa_grid/
├── run_M1.0_Z0.014_Y0.28
│ ├── inlist
│ └── LOGS
│ └── history.data
└── run_M2.0_Z0.006_Y0.25
├── inlist
└── LOGS
└── history.data
>>> from mesalab.analyzis import data_reader
>>> data_reader.scan_mesa_runs("/path/to/mesa_grid", "inlist")
[
{
'history_file_path': '/path/to/mesa_grid/run_M1.0_Z0.014_Y0.28/LOGS/history.data',
'run_dir_path': '/path/to/mesa_grid/run_M1.0_Z0.014_Y0.28',
'mass': 1.0,
'z': 0.014,
'y': 0.28
},
{
'history_file_path': '/path/to/mesa_grid/run_M2.0_Z0.006_Y0.25/LOGS/history.data',
'run_dir_path': '/path/to/mesa_grid/run_M2.0_Z0.006_Y0.25',
'mass': 2.0,
'z': 0.006,
'y': 0.25
}
]
"""
mesa_run_infos = []
# Get all direct subdirectories, filtering out hidden ones (like .mesa_temp_cache)
potential_run_dir_names = [d for d in os.listdir(input_dir)
if os.path.isdir(os.path.join(input_dir, d))
and not d.startswith('.')] # Filter out hidden directories
if not potential_run_dir_names:
logger.warning(f"No non-hidden subdirectories found directly in '{input_dir}'. "
"Ensure your MESA runs are in individual folders within this input directory.")
return []
logger.info(f"Scanning '{input_dir}' for MESA run directories...")
for run_dir_name in potential_run_dir_names:
run_dir_path = os.path.join(input_dir, run_dir_name)
inlist_path = os.path.join(run_dir_path, inlist_name)
history_file_path = os.path.join(run_dir_path, 'LOGS', 'history.data')
# Check for existence of both inlist and history.data
if os.path.exists(inlist_path) and os.path.exists(history_file_path):
mass, z, y = extract_params_from_inlist(inlist_path)
if mass is not None and z is not None:
mesa_run_infos.append({
'history_file_path': history_file_path,
'run_dir_path': run_dir_path,
'mass': mass,
'z': z,
'y': y
})
else:
logger.warning(f"Could not extract mass/Z from inlist '{inlist_path}'. Skipping this run.")
else:
# Log reasons for skipping
if not os.path.exists(inlist_path):
logger.info(f"Inlist file '{inlist_name}' not found in '{run_dir_path}'. Skipping this directory.")
if not os.path.exists(history_file_path):
logger.info(f"history.data not found at '{history_file_path}'. Skipping this directory.")
logger.info(f"Finished scanning. Found {len(mesa_run_infos)} valid MESA runs.")
return mesa_run_infos
[docs]
def get_data_from_history_file(history_file_path):
"""
Reads a MESA history.data file into a pandas DataFrame using NumPy's genfromtxt.
This function handles the specific structure of MESA `history.data` files, which contain
a few descriptive lines followed by column headers and numerical data. It attempts to
parse all columns as numeric values and ensures that 'model_number' is an integer column,
if present.
Args:
history_file_path (str): The absolute path to the MESA history.data file.
Returns:
pandas.DataFrame: A DataFrame containing the parsed history data.
Returns an empty DataFrame if the file is missing or cannot be parsed.
Exception:
If there's an error loading or processing the file, it is logged and an empty DataFrame is returned instead.
Example:
Assuming a valid 'history.data' file exists at the given path:
>>> from mesalab.analyzis import data_reader
>>> df = data_reader.get_data_from_history_file('/path/to/some_mesa_run/LOGS/history.data')
>>> if not df.empty:
... print(df.head())
... print(f"Total models: {len(df)}")
... print(f"Columns available: {list(df.columns)}")
... else:
... print("Failed to load history.data or file was empty.")
"""
if not os.path.exists(history_file_path):
logger.error(f"History file not found: {history_file_path}")
return pd.DataFrame()
try:
data = np.genfromtxt(history_file_path, names=True, comments="#", skip_header=5,
dtype=None, encoding='utf-8')
if data.ndim == 0:
df = pd.DataFrame([data.tolist()], columns=data.dtype.names)
else:
df = pd.DataFrame(data)
for col in df.columns:
df[col] = pd.to_numeric(df[col], errors='coerce')
if 'model_number' in df.columns:
df.dropna(subset=['model_number'], inplace=True)
if not df['model_number'].isnull().any():
df['model_number'] = df['model_number'].astype(int)
return df
except Exception as e:
logger.error(f"Error loading or processing {history_file_path} using np.genfromtxt: {e}")
return pd.DataFrame()