How to build risk-aware AI agents with internal criticism, self-consistency reasoning, and uncertainty estimation to make reliable decisions

by
0 comments
How to build risk-aware AI agents with internal criticism, self-consistency reasoning, and uncertainty estimation to make reliable decisions

class AgentAnalyzer:
  
   @staticmethod
   def plot_response_distribution(result: Dict):
       fig, axes = plt.subplots(2, 2, figsize=(14, 10))
       fig.suptitle('Agent Response Analysis', fontsize=16, fontweight="bold")
      
       responses = result('all_responses')
       scores = result('critic_scores')
       uncertainty = result('uncertainty')
       selected_idx = result('selected_index')
      
       ax = axes(0, 0)
       score_values = (s.overall_score for s in scores)
       bars = ax.bar(range(len(scores)), score_values, alpha=0.7)
       bars(selected_idx).set_color('green')
       bars(selected_idx).set_alpha(1.0)
       ax.axhline(np.mean(score_values), color="red", linestyle="--", label=f'Mean: {np.mean(score_values):.3f}')
       ax.set_xlabel('Response Index')
       ax.set_ylabel('Critic Score')
       ax.set_title('Critic Scores for Each Response')
       ax.legend()
       ax.grid(True, alpha=0.3)
      
       ax = axes(0, 1)
       confidences = (r.confidence for r in responses)
       bars = ax.bar(range(len(responses)), confidences, alpha=0.7, color="orange")
       bars(selected_idx).set_color('green')
       bars(selected_idx).set_alpha(1.0)
       ax.axhline(np.mean(confidences), color="red", linestyle="--", label=f'Mean: {np.mean(confidences):.3f}')
       ax.set_xlabel('Response Index')
       ax.set_ylabel('Confidence')
       ax.set_title('Model Confidence per Response')
       ax.legend()
       ax.grid(True, alpha=0.3)
      
       ax = axes(1, 0)
       components = {
           'Accuracy': (s.accuracy_score for s in scores),
           'Coherence': (s.coherence_score for s in scores),
           'Safety': (s.safety_score for s in scores)
       }
       x = np.arange(len(responses))
       width = 0.25
       for i, (name, values) in enumerate(components.items()):
           offset = (i - 1) * width
           ax.bar(x + offset, values, width, label=name, alpha=0.8)
       ax.set_xlabel('Response Index')
       ax.set_ylabel('Score')
       ax.set_title('Critic Score Components')
       ax.set_xticks(x)
       ax.legend()
       ax.grid(True, alpha=0.3, axis="y")
      
       ax = axes(1, 1)
       uncertainty_metrics = {
           'Entropy': uncertainty.entropy,
           'Variance': uncertainty.variance,
           'Consistency': uncertainty.consistency_score,
           'Epistemic': uncertainty.epistemic_uncertainty,
           'Aleatoric': uncertainty.aleatoric_uncertainty
       }
       bars = ax.barh(list(uncertainty_metrics.keys()), list(uncertainty_metrics.values()), alpha=0.7)
       ax.set_xlabel('Value')
       ax.set_title(f'Uncertainty Estimates (Risk: {uncertainty.risk_level()})')
       ax.grid(True, alpha=0.3, axis="x")
      
       plt.tight_layout()
       plt.show()
  
   @staticmethod
   def plot_strategy_comparison(agent: CriticAugmentedAgent, prompt: str, ground_truth: Optional(str) = None):
       strategies = ("best_score", "most_confident", "most_consistent", "risk_adjusted")
       results = {}
      
       print("Comparing selection strategies...n")
      
       for strategy in strategies:
           print(f"Testing strategy: {strategy}")
           result = agent.generate_with_critic(prompt, ground_truth, strategy=strategy, verbose=False)
           results(strategy) = result
      
       fig, axes = plt.subplots(1, 2, figsize=(14, 5))
       fig.suptitle('Strategy Comparison', fontsize=16, fontweight="bold")
      
       ax = axes(0)
       selected_scores = (
           results(s)('critic_scores')(results(s)('selected_index')).overall_score
           for s in strategies
       )
       bars = ax.bar(strategies, selected_scores, alpha=0.7, color="steelblue")
       ax.set_ylabel('Critic Score')
       ax.set_title('Selected Response Quality by Strategy')
       ax.set_xticklabels(strategies, rotation=45, ha="right")
       ax.grid(True, alpha=0.3, axis="y")
      
       ax = axes(1)
       for strategy in strategies:
           result = results(strategy)
           selected_idx = result('selected_index')
           confidence = result('all_responses')(selected_idx).confidence
           score = result('critic_scores')(selected_idx).overall_score
           ax.scatter(confidence, score, s=200, alpha=0.6, label=strategy)
       ax.set_xlabel('Confidence')
       ax.set_ylabel('Critic Score')
       ax.set_title('Confidence vs Quality Trade-off')
       ax.legend()
       ax.grid(True, alpha=0.3)
      
       plt.tight_layout()
       plt.show()
      
       return results


def run_basic_demo():
   print("n" + "=" * 80)
   print("DEMO 1: Basic Agent with Critic")
   print("=" * 80 + "n")
  
   agent = CriticAugmentedAgent(
       model_quality=0.8,
       risk_tolerance=0.3,
       n_samples=5
   )
  
   prompt = "What is 15 + 27?"
   ground_truth = "42"
  
   result = agent.generate_with_critic(
       prompt=prompt,
       ground_truth=ground_truth,
       strategy="risk_adjusted",
       temperature=0.8
   )
  
   print("n📊 Generating visualizations...")
   AgentAnalyzer.plot_response_distribution(result)
  
   return result


def run_strategy_comparison():
   print("n" + "=" * 80)
   print("DEMO 2: Strategy Comparison")
   print("=" * 80 + "n")
  
   agent = CriticAugmentedAgent(
       model_quality=0.75,
       risk_tolerance=0.5,
       n_samples=6
   )
  
   prompt = "What is 23 + 19?"
   ground_truth = "42"
  
   results = AgentAnalyzer.plot_strategy_comparison(agent, prompt, ground_truth)
  
   return results


def run_uncertainty_analysis():
   print("n" + "=" * 80)
   print("DEMO 3: Uncertainty Analysis")
   print("=" * 80 + "n")
  
   fig, axes = plt.subplots(1, 2, figsize=(14, 5))
  
   qualities = (0.5, 0.6, 0.7, 0.8, 0.9)
   uncertainties = ()
   consistencies = ()
  
   prompt = "What is 30 + 12?"
  
   print("Testing model quality impact on uncertainty...n")
   for quality in qualities:
       agent = CriticAugmentedAgent(model_quality=quality, n_samples=8)
       result = agent.generate_with_critic(prompt, verbose=False)
       uncertainties.append(result('uncertainty').entropy)
       consistencies.append(result('uncertainty').consistency_score)
       print(f"Quality: {quality:.1f} -> Entropy: {result('uncertainty').entropy:.3f}, "
             f"Consistency: {result('uncertainty').consistency_score:.3f}")
  
   ax = axes(0)
   ax.plot(qualities, uncertainties, 'o-', linewidth=2, markersize=8, label="Entropy")
   ax.set_xlabel('Model Quality')
   ax.set_ylabel('Entropy')
   ax.set_title('Uncertainty vs Model Quality')
   ax.grid(True, alpha=0.3)
   ax.legend()
  
   ax = axes(1)
   ax.plot(qualities, consistencies, 's-', linewidth=2, markersize=8, color="green", label="Consistency")
   ax.set_xlabel('Model Quality')
   ax.set_ylabel('Consistency Score')
   ax.set_title('Self-Consistency vs Model Quality')
   ax.grid(True, alpha=0.3)
   ax.legend()
  
   plt.tight_layout()
   plt.show()


def run_risk_sensitivity_demo():
   print("n" + "=" * 80)
   print("DEMO 4: Risk Sensitivity Analysis")
   print("=" * 80 + "n")
  
   prompt = "What is 18 + 24?"
   risk_tolerances = (0.1, 0.3, 0.5, 0.7, 0.9)
  
   results = {
       'risk_tolerance': (),
       'selected_confidence': (),
       'selected_score': (),
       'uncertainty': ()
   }
  
   print("Testing different risk tolerance levels...n")
   for risk_tol in risk_tolerances:
       agent = CriticAugmentedAgent(
           model_quality=0.75,
           risk_tolerance=risk_tol,
           n_samples=6
       )
       result = agent.generate_with_critic(prompt, verbose=False)
      
       selected_idx = result('selected_index')
       results('risk_tolerance').append(risk_tol)
       results('selected_confidence').append(
           result('all_responses')(selected_idx).confidence
       )
       results('selected_score').append(
           result('critic_scores')(selected_idx).overall_score
       )
       results('uncertainty').append(result('uncertainty').entropy)
      
       print(f"Risk Tolerance: {risk_tol:.1f} -> "
             f"Confidence: {results('selected_confidence')(-1):.3f}, "
             f"Score: {results('selected_score')(-1):.3f}")
  
   fig, ax = plt.subplots(1, 1, figsize=(10, 6))
   ax.plot(results('risk_tolerance'), results('selected_confidence'), 'o-', linewidth=2, markersize=8, label="Selected Confidence")
   ax.plot(results('risk_tolerance'), results('selected_score'), 's-', linewidth=2, markersize=8, label="Selected Score")
   ax.set_xlabel('Risk Tolerance')
   ax.set_ylabel('Value')
   ax.set_title('Risk Tolerance Impact on Selection')
   ax.legend()
   ax.grid(True, alpha=0.3)
   plt.tight_layout()
   plt.show()


def demonstrate_verbalized_uncertainty():
   print("n" + "=" * 80)
   print("RESEARCH TOPIC: Verbalized Uncertainty")
   print("=" * 80 + "n")
  
   print("Concept: Agent not only estimates uncertainty but explains it.n")
  
   agent = CriticAugmentedAgent(model_quality=0.7, n_samples=5)
   prompt = "What is 25 + 17?"
   result = agent.generate_with_critic(prompt, verbose=False)
  
   uncertainty = result('uncertainty')
  
   explanation = f"""
Uncertainty Analysis Report:
---------------------------
Risk Level: {uncertainty.risk_level()}


Detailed Breakdown:
• Answer Entropy: {uncertainty.entropy:.3f}
 → {'Low' if uncertainty.entropy < 0.5 else 'Medium' if uncertainty.entropy < 1.0 else 'High'} disagreement among generated responses


• Self-Consistency: {uncertainty.consistency_score:.3f}
 → {int(uncertainty.consistency_score * 100)}% of responses agree on the answer


• Epistemic Uncertainty: {uncertainty.epistemic_uncertainty:.3f}
 → {'Low' if uncertainty.epistemic_uncertainty < 0.3 else 'Medium' if uncertainty.epistemic_uncertainty < 0.6 else 'High'} model uncertainty (knowledge gaps)


• Aleatoric Uncertainty: {uncertainty.aleatoric_uncertainty:.3f}
 → {'Low' if uncertainty.aleatoric_uncertainty < 0.3 else 'Medium' if uncertainty.aleatoric_uncertainty < 0.6 else 'High'} data uncertainty (inherent randomness)


Recommendation:
"""
  
   if uncertainty.risk_level() == "LOW":
       explanation += "✓ High confidence in answer - safe to trust"
   elif uncertainty.risk_level() == "MEDIUM":
       explanation += "⚠ Moderate confidence - consider verification"
   else:
       explanation += "⚠ Low confidence - strongly recommend verification"
  
   print(explanation)


def demonstrate_self_consistency():
   print("n" + "=" * 80)
   print("RESEARCH TOPIC: Self-Consistency Reasoning")
   print("=" * 80 + "n")
  
   print("Concept: Generate multiple reasoning paths, select most common answer.n")
  
   agent = CriticAugmentedAgent(model_quality=0.75, n_samples=7)
   prompt = "What is 35 + 7?"
   result = agent.generate_with_critic(prompt, strategy="most_consistent", verbose=False)
  
   estimator = UncertaintyEstimator()
   answers = (estimator._extract_answer(r.content) for r in result('all_responses'))
  
   print("Generated Responses and Answers:")
   print("-" * 80)
   for i, (response, answer) in enumerate(zip(result('all_responses'), answers)):
       marker = "✓ SELECTED" if i == result('selected_index') else ""
       print(f"nResponse {i}: {answer} {marker}")
       print(f"  Confidence: {response.confidence:.3f}")
       print(f"  Content: {response.content(:80)}...")
  
   from collections import Counter
   answer_dist = Counter(answers)
  
   print(f"nnAnswer Distribution:")
   print("-" * 80)
   for answer, count in answer_dist.most_common():
       percentage = (count / len(answers)) * 100
       bar = "█" * int(percentage / 5)
       print(f"{answer:>10}: {bar} {count}/{len(answers)} ({percentage:.1f}%)")
  
   print(f"nMost Consistent Answer: {answer_dist.most_common(1)(0)(0)}")
   print(f"Consistency Score: {result('uncertainty').consistency_score:.3f}")


def main():
   print("n" + "🎯" * 40)
   print("ADVANCED AGENT WITH INTERNAL CRITIC + UNCERTAINTY ESTIMATION")
   print("Tutorial and Demonstrations")
   print("🎯" * 40)
  
   plt.style.use('seaborn-v0_8-darkgrid')
   sns.set_palette("husl")
  
   try:
       result1 = run_basic_demo()
       result2 = run_strategy_comparison()
       run_uncertainty_analysis()
       run_risk_sensitivity_demo()
       demonstrate_verbalized_uncertainty()
       demonstrate_self_consistency()
      
       print("n" + "=" * 80)
       print("✅ ALL DEMONSTRATIONS COMPLETED SUCCESSFULLY")
       print("=" * 80)
       print("""
Key Takeaways:
1. Internal critics improve response quality through multi-dimensional evaluation
2. Uncertainty estimation enables risk-aware decision making
3. Self-consistency reasoning increases reliability
4. Different selection strategies optimize for different objectives
5. Verbalized uncertainty helps users understand model confidence


Next Steps:
• Implement with real LLM APIs (OpenAI, Anthropic, etc.)
• Add learned critic models (fine-tuned classifiers)
• Explore ensemble methods and meta-learning
• Integrate with retrieval-augmented generation (RAG)
• Deploy in production with monitoring and feedback loops
       """)
      
   except Exception as e:
       print(f"n❌ Error during demonstration: {e}")
       import traceback
       traceback.print_exc()


if __name__ == "__main__":
   main()

Related Articles

Leave a Comment