Source code for mesalab.analyzis.data_reader

import pandas as pd
import numpy as np
import re
import os
import logging

logger = logging.getLogger(__name__)

[docs] def extract_params_from_inlist(inlist_path): """ Extract `initial_mass`, `initial_Z`, and `initial_Y` values from a MESA inlist file. Handles Fortran-style scientific notation (e.g., `1.0d-2`, `2.5D+1`) by converting it to Python-compatible form. Returns `None` for missing values, with warnings logged. Args: inlist_path (str): Absolute path to the MESA inlist file. Returns: tuple: A tuple (mass, z, y) where elements are floats or `None` if the corresponding parameter is not found. Example: Assuming 'my_run/inlist_project' contains: initial_mass = 1.0 initial_Z = 0.014 initial_Y = 0.28 >>> from mesalab.analyzis import data_reader >>> data_reader.extract_params_from_inlist("my_run/inlist_project") (1.0, 0.014, 0.28) """ mass = None z = None y = None try: with open(inlist_path, 'r') as f: content = f.read() # Regex for initial_mass: allows integers, floats, and scientific notation (e or d) mass_match = re.search(r'^\s*initial_mass\s*=\s*(\d+\.?\d*(?:[deDE][+\-]?\d+)?)\s*(?:!.*)?$', content, re.MULTILINE | re.IGNORECASE) if mass_match: mass = float(mass_match.group(1).replace('d', 'e').replace('D', 'E')) # Regex for initial_Z: allows integers, floats, and scientific notation (e or d) z_match = re.search(r'^\s*initial_Z\s*=\s*(\d+\.?\d*(?:[deDE][+\-]?\d+)?)\s*(?:!.*)?$', content, re.MULTILINE | re.IGNORECASE) if z_match: z = float(z_match.group(1).replace('d', 'e').replace('D', 'E')) # Regex for initial_Y: allows integers, floats, and scientific notation (e or d) y_match = re.search(r'^\s*initial_Y\s*=\s*(\d+\.?\d*(?:[deDE][+\-]?\d+)?)\s*(?:!.*)?$', content, re.MULTILINE | re.IGNORECASE) if y_match: y_value_str = y_match.group(1).replace('d', 'e').replace('D', 'E') y = float(y_value_str) else: logger.warning(f"Could not find 'initial_Y' in {inlist_path}. Setting to None for this run's parsing.") except FileNotFoundError: logger.error(f"Inlist file not found: {inlist_path}") except Exception as e: logger.error(f"Error reading inlist file {inlist_path}: {e}") # Log warnings if mass or Z parameters are not found if mass is None: logger.warning(f"Could not find 'initial_mass' in {inlist_path}.") if z is None: logger.warning(f"Could not find 'initial_Z' in {inlist_path}.") return mass, z, y
[docs] def scan_mesa_runs(input_dir, inlist_name): """ Scan a directory for MESA run subdirectories that contain both an inlist file and history.data. Each valid subdirectory must: - Be a direct (non-hidden) subdirectory of `input_dir` - Contain the specified `inlist_name` file - Contain a LOGS/history.data file The function attempts to extract `initial_mass`, `initial_Z`, and `initial_Y` from each inlist file. Only runs with both `mass` and `z` values present are included in the output. Args: input_dir (str): Absolute path to the main directory containing MESA run subdirectories. inlist_name (str): Name of the inlist file expected in each subdirectory (e.g., 'inlist'). Returns: list of dict: Each dictionary represents a valid MESA run and contains: - 'history_file_path' (str): Full path to the history.data file - 'run_dir_path' (str): Full path to the MESA run directory - 'mass' (float): Extracted initial_mass - 'z' (float): Extracted initial_Z - 'y' (float or None): Extracted initial_Y Returns an empty list if no valid runs are found. Example: Given a directory structure like:: /path/to/mesa_grid/ ├── run_M1.0_Z0.014_Y0.28 │ ├── inlist │ └── LOGS │ └── history.data └── run_M2.0_Z0.006_Y0.25 ├── inlist └── LOGS └── history.data >>> from mesalab.analyzis import data_reader >>> data_reader.scan_mesa_runs("/path/to/mesa_grid", "inlist") [ { 'history_file_path': '/path/to/mesa_grid/run_M1.0_Z0.014_Y0.28/LOGS/history.data', 'run_dir_path': '/path/to/mesa_grid/run_M1.0_Z0.014_Y0.28', 'mass': 1.0, 'z': 0.014, 'y': 0.28 }, { 'history_file_path': '/path/to/mesa_grid/run_M2.0_Z0.006_Y0.25/LOGS/history.data', 'run_dir_path': '/path/to/mesa_grid/run_M2.0_Z0.006_Y0.25', 'mass': 2.0, 'z': 0.006, 'y': 0.25 } ] """ mesa_run_infos = [] # Get all direct subdirectories, filtering out hidden ones (like .mesa_temp_cache) potential_run_dir_names = [d for d in os.listdir(input_dir) if os.path.isdir(os.path.join(input_dir, d)) and not d.startswith('.')] # Filter out hidden directories if not potential_run_dir_names: logger.warning(f"No non-hidden subdirectories found directly in '{input_dir}'. " "Ensure your MESA runs are in individual folders within this input directory.") return [] logger.info(f"Scanning '{input_dir}' for MESA run directories...") for run_dir_name in potential_run_dir_names: run_dir_path = os.path.join(input_dir, run_dir_name) inlist_path = os.path.join(run_dir_path, inlist_name) history_file_path = os.path.join(run_dir_path, 'LOGS', 'history.data') # Check for existence of both inlist and history.data if os.path.exists(inlist_path) and os.path.exists(history_file_path): mass, z, y = extract_params_from_inlist(inlist_path) if mass is not None and z is not None: mesa_run_infos.append({ 'history_file_path': history_file_path, 'run_dir_path': run_dir_path, 'mass': mass, 'z': z, 'y': y }) else: logger.warning(f"Could not extract mass/Z from inlist '{inlist_path}'. Skipping this run.") else: # Log reasons for skipping if not os.path.exists(inlist_path): logger.info(f"Inlist file '{inlist_name}' not found in '{run_dir_path}'. Skipping this directory.") if not os.path.exists(history_file_path): logger.info(f"history.data not found at '{history_file_path}'. Skipping this directory.") logger.info(f"Finished scanning. Found {len(mesa_run_infos)} valid MESA runs.") return mesa_run_infos
[docs] def get_data_from_history_file(history_file_path): """ Reads a MESA history.data file into a pandas DataFrame using NumPy's genfromtxt. This function handles the specific structure of MESA `history.data` files, which contain a few descriptive lines followed by column headers and numerical data. It attempts to parse all columns as numeric values and ensures that 'model_number' is an integer column, if present. Args: history_file_path (str): The absolute path to the MESA history.data file. Returns: pandas.DataFrame: A DataFrame containing the parsed history data. Returns an empty DataFrame if the file is missing or cannot be parsed. Exception: If there's an error loading or processing the file, it is logged and an empty DataFrame is returned instead. Example: Assuming a valid 'history.data' file exists at the given path: >>> from mesalab.analyzis import data_reader >>> df = data_reader.get_data_from_history_file('/path/to/some_mesa_run/LOGS/history.data') >>> if not df.empty: ... print(df.head()) ... print(f"Total models: {len(df)}") ... print(f"Columns available: {list(df.columns)}") ... else: ... print("Failed to load history.data or file was empty.") """ if not os.path.exists(history_file_path): logger.error(f"History file not found: {history_file_path}") return pd.DataFrame() try: data = np.genfromtxt(history_file_path, names=True, comments="#", skip_header=5, dtype=None, encoding='utf-8') if data.ndim == 0: df = pd.DataFrame([data.tolist()], columns=data.dtype.names) else: df = pd.DataFrame(data) for col in df.columns: df[col] = pd.to_numeric(df[col], errors='coerce') if 'model_number' in df.columns: df.dropna(subset=['model_number'], inplace=True) if not df['model_number'].isnull().any(): df['model_number'] = df['model_number'].astype(int) return df except Exception as e: logger.error(f"Error loading or processing {history_file_path} using np.genfromtxt: {e}") return pd.DataFrame()