算法内部¶

访问和分析进化算法的内部状态。

概述¶

EvoToolkit 的低级 API 提供对算法内部的完全访问，允许您： - 检查进化历史 - 访问解种群 - 提取指标和统计数据 - 绘制进化进程

访问运行状态¶

所有算法将其内部状态存储在 run_state_dict 中：

from evotoolkit.evo_method.evoengineer import EvoEngineer, EvoEngineerConfig

algorithm = EvoEngineer(config)
algorithm.run()

# 访问运行状态
run_state = algorithm.run_state_dict

# 获取所有解历史
all_solutions = run_state.sol_history

# 获取当前种群
current_population = run_state.population

检查进化历史¶

获取所有解¶

# 所有生成过的解（包括无效的）
all_solutions = algorithm.run_state_dict.sol_history

print(f"总共生成的解: {len(all_solutions)}")

过滤有效解¶

# 仅有效解
valid_solutions = [
    sol for sol in all_solutions
    if sol.evaluation_res.valid
]

print(f"有效解: {len(valid_solutions)}")
print(f"成功率: {len(valid_solutions) / len(all_solutions) * 100:.1f}%")

获取分数历史¶

# 提取分数（越高越好）
score_history = [
    sol.evaluation_res.score
    for sol in all_solutions
    if sol.evaluation_res.valid
]

print(f"最佳分数: {max(score_history)}")
print(f"平均分数: {sum(score_history) / len(score_history):.4f}")
print(f"分数提升: {max(score_history) - score_history[0]:.4f}")

获取最佳解¶

# 方法 1：使用内置辅助函数
best_solution = algorithm._get_best_sol(algorithm.run_state_dict.sol_history)

# 方法 2：手动搜索
best_solution = max(
    all_solutions,
    key=lambda s: s.evaluation_res.score if s.evaluation_res.valid else float('-inf')
)

print(f"最佳分数: {best_solution.evaluation_res.score}")
print(f"最佳代码:\n{best_solution.sol_string}")

解对象结构¶

每个解包含详细信息：

solution = all_solutions[0]

# 核心属性
solution.sol_string          # 实际的代码/解字符串
solution.evaluation_res      # 评估结果对象
solution.other_info         # 附加元数据字典

# 评估结果
eval_res = solution.evaluation_res
eval_res.valid              # 布尔值：解是否有效？
eval_res.score              # 浮点数：适应度分数（越高越好）
eval_res.error_message      # 字符串：如果无效则为错误信息
eval_res.metadata           # 字典：附加评估信息

# 示例：打印解的详细信息
for i, sol in enumerate(all_solutions[:5]):
    print(f"\n解 {i+1}:")
    print(f"  有效: {sol.evaluation_res.valid}")
    print(f"  分数: {sol.evaluation_res.score:.4f}")
    if not sol.evaluation_res.valid:
        print(f"  错误: {sol.evaluation_res.error_message}")

绘制进化进程¶

分数随时间变化¶

import matplotlib.pyplot as plt

# 按顺序获取有效分数
scores = [
    sol.evaluation_res.score
    for sol in all_solutions
    if sol.evaluation_res.valid
]

plt.figure(figsize=(10, 6))
plt.plot(scores, marker='o', alpha=0.6, linewidth=1, markersize=4)
plt.xlabel('解索引', fontsize=12)
plt.ylabel('分数（越高越好）', fontsize=12)
plt.title('进化进程', fontsize=14)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('./results/evolution_progress.png', dpi=300)
plt.show()

按代数的最佳分数¶

import matplotlib.pyplot as plt
import numpy as np

# 按代数分组解
generations = {}
for sol in all_solutions:
    if sol.evaluation_res.valid:
        gen = sol.other_info.get('generation', 0)
        if gen not in generations:
            generations[gen] = []
        generations[gen].append(sol.evaluation_res.score)

# 获取每代的最佳分数
gen_numbers = sorted(generations.keys())
best_scores = [max(generations[gen]) for gen in gen_numbers]
avg_scores = [np.mean(generations[gen]) for gen in gen_numbers]

plt.figure(figsize=(10, 6))
plt.plot(gen_numbers, best_scores, 'g-o', label='最佳分数', linewidth=2)
plt.plot(gen_numbers, avg_scores, 'b--s', label='平均分数', linewidth=2)
plt.xlabel('代数', fontsize=12)
plt.ylabel('分数', fontsize=12)
plt.title('按代数的分数', fontsize=14)
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('./results/score_by_generation.png', dpi=300)
plt.show()

成功率分析¶

import matplotlib.pyplot as plt

# 按代数计算成功率
success_rates = []
for gen in gen_numbers:
    total = len([s for s in all_solutions if s.other_info.get('generation') == gen])
    valid = len(generations.get(gen, []))
    success_rates.append(valid / total * 100 if total > 0 else 0)

plt.figure(figsize=(10, 6))
plt.bar(gen_numbers, success_rates, alpha=0.7, color='steelblue')
plt.xlabel('代数', fontsize=12)
plt.ylabel('成功率 (%)', fontsize=12)
plt.title('按代数的解有效性', fontsize=14)
plt.ylim(0, 100)
plt.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.savefig('./results/success_rate.png', dpi=300)
plt.show()

分析解的多样性¶

代码长度分布¶

import matplotlib.pyplot as plt

# 获取代码长度
code_lengths = [
    len(sol.sol_string)
    for sol in all_solutions
    if sol.evaluation_res.valid
]

plt.figure(figsize=(10, 6))
plt.hist(code_lengths, bins=20, alpha=0.7, color='coral', edgecolor='black')
plt.xlabel('代码长度（字符）', fontsize=12)
plt.ylabel('频率', fontsize=12)
plt.title('解代码长度分布', fontsize=14)
plt.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.savefig('./results/code_length_dist.png', dpi=300)
plt.show()

分数分布¶

import matplotlib.pyplot as plt

scores = [
    sol.evaluation_res.score
    for sol in all_solutions
    if sol.evaluation_res.valid
]

plt.figure(figsize=(10, 6))
plt.hist(scores, bins=30, alpha=0.7, color='lightgreen', edgecolor='black')
plt.xlabel('分数', fontsize=12)
plt.ylabel('频率', fontsize=12)
plt.title('分数分布', fontsize=14)
plt.axvline(max(scores), color='r', linestyle='--', linewidth=2, label=f'最佳: {max(scores):.4f}')
plt.axvline(np.mean(scores), color='b', linestyle='--', linewidth=2, label=f'平均: {np.mean(scores):.4f}')
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.savefig('./results/score_distribution.png', dpi=300)
plt.show()

提取指标¶

综合统计¶

import numpy as np

def compute_statistics(all_solutions):
    """计算综合进化统计数据"""

    valid_solutions = [s for s in all_solutions if s.evaluation_res.valid]
    scores = [s.evaluation_res.score for s in valid_solutions]

    stats = {
        'total_solutions': len(all_solutions),
        'valid_solutions': len(valid_solutions),
        'success_rate': len(valid_solutions) / len(all_solutions) * 100,
        'best_score': max(scores) if scores else None,
        'worst_score': min(scores) if scores else None,
        'mean_score': np.mean(scores) if scores else None,
        'median_score': np.median(scores) if scores else None,
        'std_score': np.std(scores) if scores else None,
        'score_range': max(scores) - min(scores) if scores else None,
    }

    return stats

stats = compute_statistics(all_solutions)

print("进化统计:")
print(f"  总解数: {stats['total_solutions']}")
print(f"  有效解数: {stats['valid_solutions']}")
print(f"  成功率: {stats['success_rate']:.1f}%")
print(f"\n分数统计:")
print(f"  最佳: {stats['best_score']:.4f}")
print(f"  最差: {stats['worst_score']:.4f}")
print(f"  平均: {stats['mean_score']:.4f}")
print(f"  中位数: {stats['median_score']:.4f}")
print(f"  标准差: {stats['std_score']:.4f}")
print(f"  范围: {stats['score_range']:.4f}")

导出到 DataFrame¶

import pandas as pd

# 将解转换为 DataFrame 进行分析
data = []
for i, sol in enumerate(all_solutions):
    data.append({
        'index': i,
        'valid': sol.evaluation_res.valid,
        'score': sol.evaluation_res.score if sol.evaluation_res.valid else None,
        'generation': sol.other_info.get('generation', -1),
        'code_length': len(sol.sol_string),
        'error': sol.evaluation_res.error_message if not sol.evaluation_res.valid else None
    })

df = pd.DataFrame(data)

# 保存到 CSV
df.to_csv('./results/evolution_data.csv', index=False)

# 快速分析
print(df.describe())
print("\n按代数的分数:")
print(df.groupby('generation')['score'].agg(['mean', 'max', 'count']))

下一步¶

学习调试与性能分析来排查问题
查看低级 API 获取更多控制选项
查看配置进行参数调优

资源¶

Matplotlib 文档 - 绘图库
Pandas 文档 - 数据分析
NumPy 文档 - 数值计算