Wakefield's Daily Tips

Extracting expInfo variables

Today I wrote (with the help of chatGPT) a Python script to extract all of the expInfo variables in Builder files across a set of folders. In the script below if looks in every folder in my Documents/Pavlovia folder and creates an Excel file containing the Key (expInfo variable name), Type (STRING, NUMBER or BOOLEAN), default value, List (if the value is a list of options), Folder (for if your group your experiments) and Experiment (based on the folder name, not the experiment name).

import pathlib
import os
import re
import fnmatch
import pandas as pd
import ast

# Modify this line to locate your experiment files
experiments_folder = pathlib.Path.home() / "Documents" / "Pavlovia"

# Regex to find the Experiment info dictionary
pattern = r'<Param val="([^"]+)" valType="code" updates="None" name="Experiment info"/>'

data = []  # List to store extracted rows

def is_number(value):
    """Check if a value is a number, even if stored as a string."""
    try:
        float(value)
        return True
    except (ValueError, TypeError):
        return False

for path, dirs, files in os.walk(experiments_folder):
    path = pathlib.Path(path)  # Convert to Path object
    if len(path.parts) > len(experiments_folder.parts):  # Ignore chosen folder
        #for filename in fnmatch.filter(files, "*.psyexp"):  # Match all .psyexp files
        for filename in [f for f in fnmatch.filter(files, "*.psyexp") if not fnmatch.fnmatch(f, "*_legacy.psyexp")]:
            filepath = path / filename
            experiment_name = path.name  # Folder name as experiment identifier
            with open(filepath, "r", encoding="utf-8") as file:
                try:
                    content = file.read()
                except (SyntaxError, ValueError) as e:
                    print(f"❌ Error opening {filepath}: {e}")

            matches = re.findall(pattern, content)
            if matches:
                print(experiment_name)
                param_value = matches[0]  # Assume one match per file
                clean_param_value = param_value.replace("&quot;", "\"")

                try:
                    param_dict = ast.literal_eval(clean_param_value)
                    for key, value in param_dict.items():
                        required = "|req" in key
                        config = "|cfg" in key
                        clean_key = key.replace("|req", "").replace("|cfg", "")  # Remove markers

                        # Default Options column as empty string
                        options = ""

                        # Detect lists inside strings
                        if isinstance(value, str) and ((value.startswith("[") and value.endswith("]")) or "," in str(value)):
                            try:
                                value = ast.literal_eval(value)  # Convert string to list if possible
                            except (SyntaxError, ValueError):
                                pass  # Keep as a string if conversion fails

                        # Determine Type and adjust Value column
                        if isinstance(value, (list, tuple)):
                            options = str(list(value))
                            first_item = value[0] if value else None  # First item or None
                            value_type = "NUMBER" if is_number(first_item) else "STRING"
                            value = first_item  # Store first item in Value column
                        elif isinstance(value, list):
                            options = str(value)
                            first_item = value[0] if value else None  # First item or None
                            value_type = "NUMBER" if is_number(first_item) else "STRING"
                            value = first_item  # Store first item in Value column
                        elif is_number(value):  # Detect numbers stored as strings
                            value_type = "NUMBER"
                            value = float(value) if "." in str(value) else int(value)
                        elif value == "True":
                            value_type = "BOOLEAN"
                        else:
                            value_type = "STRING"
                            
                        
                        folder_name = path.parent.name
                        data.append((clean_key, value_type, required, config, value, options, folder_name, experiment_name))
                except (SyntaxError, ValueError) as e:
                    print(f"❌ Error parsing dictionary in {filepath}: {e}")

# Convert to DataFrame
df = pd.DataFrame(data, columns=["Key", "Type", "Required", "Config", "Value", "List", "Folder", "Experiment"])

# Replace None with empty string explicitly
df["List"] = df["List"].fillna("")

# Aggregate assessments into lists
df_grouped = df.groupby(["Key", "Type", "Required", "Config", "Value", "List", "Folder"], dropna=False)["Experiment"].apply(list).reset_index()

# Save to Excel
output_path = experiments_folder / "experiments_summary.xlsx"
df_grouped.to_excel(output_path, index=False)

print(f"✅ Data saved to {output_path}")

This probably doesn’t have much use if you only have a few experiments but I have quite a lot (as you can probably imagine) and today I wanted to check the ones created for a particular client to see if I had been using the same variable names and defaults across the different experiments.

Required is identified by a |req tag at the end of the key.
Config is identified by a |cfg tag at the end of the key, e.g. offWhite|cfg.

expInfo_variables.py (4.5 KB)