Extracting expInfo variables
Today I wrote (with the help of chatGPT) a Python script to extract all of the expInfo variables in Builder files across a set of folders. In the script below if looks in every folder in my Documents/Pavlovia folder and creates an Excel file containing the Key (expInfo variable name), Type (STRING, NUMBER or BOOLEAN), default value, List (if the value is a list of options), Folder (for if your group your experiments) and Experiment (based on the folder name, not the experiment name).
import pathlib
import os
import re
import fnmatch
import pandas as pd
import ast
# Modify this line to locate your experiment files
experiments_folder = pathlib.Path.home() / "Documents" / "Pavlovia"
# Regex to find the Experiment info dictionary
pattern = r'<Param val="([^"]+)" valType="code" updates="None" name="Experiment info"/>'
data = [] # List to store extracted rows
def is_number(value):
"""Check if a value is a number, even if stored as a string."""
try:
float(value)
return True
except (ValueError, TypeError):
return False
for path, dirs, files in os.walk(experiments_folder):
path = pathlib.Path(path) # Convert to Path object
if len(path.parts) > len(experiments_folder.parts): # Ignore chosen folder
#for filename in fnmatch.filter(files, "*.psyexp"): # Match all .psyexp files
for filename in [f for f in fnmatch.filter(files, "*.psyexp") if not fnmatch.fnmatch(f, "*_legacy.psyexp")]:
filepath = path / filename
experiment_name = path.name # Folder name as experiment identifier
with open(filepath, "r", encoding="utf-8") as file:
try:
content = file.read()
except (SyntaxError, ValueError) as e:
print(f"❌ Error opening {filepath}: {e}")
matches = re.findall(pattern, content)
if matches:
print(experiment_name)
param_value = matches[0] # Assume one match per file
clean_param_value = param_value.replace(""", "\"")
try:
param_dict = ast.literal_eval(clean_param_value)
for key, value in param_dict.items():
required = "|req" in key
config = "|cfg" in key
clean_key = key.replace("|req", "").replace("|cfg", "") # Remove markers
# Default Options column as empty string
options = ""
# Detect lists inside strings
if isinstance(value, str) and ((value.startswith("[") and value.endswith("]")) or "," in str(value)):
try:
value = ast.literal_eval(value) # Convert string to list if possible
except (SyntaxError, ValueError):
pass # Keep as a string if conversion fails
# Determine Type and adjust Value column
if isinstance(value, (list, tuple)):
options = str(list(value))
first_item = value[0] if value else None # First item or None
value_type = "NUMBER" if is_number(first_item) else "STRING"
value = first_item # Store first item in Value column
elif isinstance(value, list):
options = str(value)
first_item = value[0] if value else None # First item or None
value_type = "NUMBER" if is_number(first_item) else "STRING"
value = first_item # Store first item in Value column
elif is_number(value): # Detect numbers stored as strings
value_type = "NUMBER"
value = float(value) if "." in str(value) else int(value)
elif value == "True":
value_type = "BOOLEAN"
else:
value_type = "STRING"
folder_name = path.parent.name
data.append((clean_key, value_type, required, config, value, options, folder_name, experiment_name))
except (SyntaxError, ValueError) as e:
print(f"❌ Error parsing dictionary in {filepath}: {e}")
# Convert to DataFrame
df = pd.DataFrame(data, columns=["Key", "Type", "Required", "Config", "Value", "List", "Folder", "Experiment"])
# Replace None with empty string explicitly
df["List"] = df["List"].fillna("")
# Aggregate assessments into lists
df_grouped = df.groupby(["Key", "Type", "Required", "Config", "Value", "List", "Folder"], dropna=False)["Experiment"].apply(list).reset_index()
# Save to Excel
output_path = experiments_folder / "experiments_summary.xlsx"
df_grouped.to_excel(output_path, index=False)
print(f"✅ Data saved to {output_path}")
This probably doesn’t have much use if you only have a few experiments but I have quite a lot (as you can probably imagine) and today I wanted to check the ones created for a particular client to see if I had been using the same variable names and defaults across the different experiments.
Required is identified by a |req tag at the end of the key.
Config is identified by a |cfg tag at the end of the key, e.g. offWhite|cfg.
expInfo_variables.py (4.5 KB)