Files
disbord/plugins/research_agent/main.py
Travis Vasceannie 3acb779569 chore: remove .env.example and add new files for project structure
- Deleted .env.example file as it is no longer needed.
- Added .gitignore to manage ignored files and directories.
- Introduced CLAUDE.md for AI provider integration documentation.
- Created dev.sh for development setup and scripts.
- Updated Dockerfile and Dockerfile.production for improved build processes.
- Added multiple test files and directories for comprehensive testing.
- Introduced new utility and service files for enhanced functionality.
- Organized codebase with new directories and files for better maintainability.
2025-08-27 23:00:19 -04:00

646 lines
24 KiB
Python

"""
Research Agent Plugin - Advanced Information Gathering and Analysis
Demonstrates research capabilities with web search, data analysis, and synthesis
"""
import asyncio
import json
import logging
from datetime import datetime, timedelta
from typing import Any, Dict, List
from extensions.plugin_manager import (PluginMetadata, PluginType,
ResearchAgentPlugin)
logger = logging.getLogger(__name__)
class AdvancedResearchAgent(ResearchAgentPlugin):
"""
Advanced Research Agent Plugin
Features:
- Multi-source information gathering
- Real-time web search integration
- Data synthesis and summarization
- Citation tracking and verification
- Collaborative research sessions
- Research history and caching
"""
@property
def metadata(self) -> PluginMetadata:
return PluginMetadata(
name="research_agent",
version="1.0.0",
description="Advanced research agent with multi-source data gathering and analysis",
author="Discord Quote Bot Team",
plugin_type=PluginType.RESEARCH_AGENT,
dependencies=["memory_system"],
permissions=["web.search", "data.analyze", "cache.write"],
config_schema={
"max_search_results": {"type": "integer", "default": 10},
"search_timeout": {"type": "integer", "default": 30},
"enable_caching": {"type": "boolean", "default": True},
"citation_style": {"type": "string", "default": "apa"},
},
)
async def on_initialize(self):
"""Initialize the research agent plugin"""
logger.info("Initializing Research Agent Plugin...")
# Configuration
self.max_search_results = self.config.get("max_search_results", 10)
self.search_timeout = self.config.get("search_timeout", 30)
self.enable_caching = self.config.get("enable_caching", True)
self.citation_style = self.config.get("citation_style", "apa")
# Research session tracking
self.active_sessions: Dict[int, Dict[str, Any]] = {}
self.research_cache: Dict[str, Dict[str, Any]] = {}
# Register event handlers
self.register_event_handler("research_request", self.handle_research_request)
self.register_event_handler("analysis_request", self.handle_analysis_request)
logger.info("Research Agent Plugin initialized successfully")
async def search(self, query: str, context: Dict[str, Any]) -> Dict[str, Any]:
"""Perform comprehensive research search"""
try:
user_id = context.get("user_id")
session_id = context.get(
"session_id", f"search_{int(datetime.utcnow().timestamp())}"
)
# Check cache first
cache_key = f"search:{hash(query)}"
if self.enable_caching and cache_key in self.research_cache:
cached_result = self.research_cache[cache_key]
if (
datetime.utcnow()
- datetime.fromisoformat(cached_result["timestamp"])
) < timedelta(hours=24):
logger.info(f"Returning cached search results for: {query}")
return cached_result["data"]
# Perform multi-source search
search_results = await self._perform_multi_source_search(query, context)
# Analyze and synthesize results
synthesis = await self._synthesize_results(query, search_results)
# Generate citations
citations = await self._generate_citations(search_results)
# Compile final result
result = {
"query": query,
"session_id": session_id,
"timestamp": datetime.utcnow().isoformat(),
"sources_searched": len(search_results),
"synthesis": synthesis,
"citations": citations,
"raw_results": search_results[:5], # Limit raw data
"confidence": self._calculate_confidence(search_results),
"follow_up_suggestions": await self._generate_follow_up_questions(
query, synthesis
),
}
# Cache result
if self.enable_caching:
self.research_cache[cache_key] = {
"data": result,
"timestamp": datetime.utcnow().isoformat(),
}
# Track in session
if user_id:
await self._update_research_session(user_id, session_id, result)
return result
except Exception as e:
logger.error(f"Error performing search: {e}")
return {
"query": query,
"error": str(e),
"timestamp": datetime.utcnow().isoformat(),
"success": False,
}
async def analyze(self, data: Any, analysis_type: str) -> Dict[str, Any]:
"""Analyze data using various analytical methods"""
try:
analysis_methods = {
"sentiment": self._analyze_sentiment,
"trends": self._analyze_trends,
"summarize": self._summarize_content,
"compare": self._compare_sources,
"fact_check": self._fact_check,
"bias_check": self._bias_analysis,
}
if analysis_type not in analysis_methods:
return {
"error": f"Unknown analysis type: {analysis_type}",
"available_types": list(analysis_methods.keys()),
}
# Perform analysis
result = await analysis_methods[analysis_type](data)
return {
"analysis_type": analysis_type,
"timestamp": datetime.utcnow().isoformat(),
"result": result,
"confidence": getattr(result, "confidence", 0.8),
"methodology": self._get_analysis_methodology(analysis_type),
}
except Exception as e:
logger.error(f"Error performing analysis: {e}")
return {"error": str(e), "analysis_type": analysis_type, "success": False}
async def handle_research_request(self, **kwargs):
"""Handle research request event"""
try:
query = kwargs.get("query")
user_id = kwargs.get("user_id")
context = kwargs.get("context", {})
if not query:
return {"error": "No query provided"}
# Add user context
context.update(
{
"user_id": user_id,
"request_type": "research",
"timestamp": datetime.utcnow().isoformat(),
}
)
# Perform search
result = await self.search(query, context)
# Generate user-friendly response
response = await self._format_research_response(result)
return {"response": response, "detailed_results": result, "success": True}
except Exception as e:
logger.error(f"Error handling research request: {e}")
return {"error": str(e), "success": False}
async def handle_analysis_request(self, **kwargs):
"""Handle analysis request event"""
try:
data = kwargs.get("data")
analysis_type = kwargs.get("analysis_type", "summarize")
kwargs.get("user_id")
if not data:
return {"error": "No data provided for analysis"}
# Perform analysis
result = await self.analyze(data, analysis_type)
return result
except Exception as e:
logger.error(f"Error handling analysis request: {e}")
return {"error": str(e), "success": False}
async def _perform_multi_source_search(
self, query: str, context: Dict[str, Any]
) -> List[Dict[str, Any]]:
"""Perform search across multiple sources"""
try:
search_sources = [
self._search_web,
self._search_knowledge_base,
self._search_memory_system,
]
# Execute searches concurrently
search_tasks = [source(query, context) for source in search_sources]
source_results = await asyncio.gather(*search_tasks, return_exceptions=True)
# Combine and clean results
all_results = []
for i, results in enumerate(source_results):
if isinstance(results, Exception):
logger.error(f"Search source {i} failed: {results}")
continue
if isinstance(results, list):
all_results.extend(results)
# Remove duplicates and rank by relevance
deduplicated = self._deduplicate_results(all_results)
ranked_results = self._rank_results(deduplicated, query)
return ranked_results[: self.max_search_results]
except Exception as e:
logger.error(f"Error in multi-source search: {e}")
return []
async def _search_web(
self, query: str, context: Dict[str, Any]
) -> List[Dict[str, Any]]:
"""Search web sources (placeholder implementation)"""
try:
# This would integrate with actual web search APIs
# For demonstration, returning mock results
return [
{
"title": f'Web Result for "{query}"',
"url": "https://example.com/article1",
"snippet": f"This is a comprehensive article about {query}...",
"source": "web",
"relevance": 0.9,
"date": datetime.utcnow().isoformat(),
"type": "article",
},
{
"title": f"Research Paper: {query}",
"url": "https://academic.example.com/paper1",
"snippet": f"Academic research on {query} shows...",
"source": "academic",
"relevance": 0.95,
"date": (datetime.utcnow() - timedelta(days=30)).isoformat(),
"type": "paper",
},
]
except Exception as e:
logger.error(f"Web search error: {e}")
return []
async def _search_knowledge_base(
self, query: str, context: Dict[str, Any]
) -> List[Dict[str, Any]]:
"""Search internal knowledge base"""
try:
# Search memory system for relevant information
if self.memory_manager:
memories = await self.memory_manager.search_memories(query, limit=5)
results = []
for memory in memories:
results.append(
{
"title": f'Internal Knowledge: {memory.get("title", "Untitled")}',
"content": memory.get("content", ""),
"source": "knowledge_base",
"relevance": memory.get("similarity", 0.8),
"date": memory.get(
"timestamp", datetime.utcnow().isoformat()
),
"type": "internal",
}
)
return results
return []
except Exception as e:
logger.error(f"Knowledge base search error: {e}")
return []
async def _search_memory_system(
self, query: str, context: Dict[str, Any]
) -> List[Dict[str, Any]]:
"""Search conversation and interaction memory"""
try:
# Search for relevant past conversations and interactions
user_id = context.get("user_id")
if user_id and self.memory_manager:
user_memories = await self.memory_manager.get_user_memories(
user_id, query
)
results = []
for memory in user_memories:
results.append(
{
"title": "Previous Conversation",
"content": memory.get("summary", ""),
"source": "memory",
"relevance": memory.get("relevance", 0.7),
"date": memory.get("timestamp"),
"type": "conversation",
}
)
return results
return []
except Exception as e:
logger.error(f"Memory search error: {e}")
return []
async def _synthesize_results(
self, query: str, results: List[Dict[str, Any]]
) -> Dict[str, Any]:
"""Synthesize search results into coherent summary"""
try:
if not results:
return {
"summary": "No relevant information found.",
"key_points": [],
"confidence": 0.0,
}
# Use AI to synthesize information
synthesis_prompt = f"""
Based on the following search results for "{query}", provide a comprehensive synthesis:
Results:
{json.dumps([{k: v for k, v in r.items() if k in ['title', 'snippet', 'content']} for r in results[:5]], indent=2)}
Provide:
1. A concise summary (2-3 paragraphs)
2. Key points (bullet list)
3. Different perspectives if any
4. Reliability assessment
"""
ai_response = await self.ai_manager.generate_text(
synthesis_prompt, provider="openai", model="gpt-4", max_tokens=800
)
# Parse AI response (simplified)
return {
"summary": ai_response.get("content", "Unable to generate synthesis"),
"key_points": self._extract_key_points(results),
"perspectives": self._identify_perspectives(results),
"confidence": self._calculate_synthesis_confidence(results),
}
except Exception as e:
logger.error(f"Error synthesizing results: {e}")
return {
"summary": "Error generating synthesis",
"key_points": [],
"confidence": 0.0,
}
async def _generate_citations(self, results: List[Dict[str, Any]]) -> List[str]:
"""Generate properly formatted citations"""
citations = []
for i, result in enumerate(results[:5], 1):
try:
if self.citation_style == "apa":
citation = self._format_apa_citation(result, i)
else:
citation = self._format_basic_citation(result, i)
citations.append(citation)
except Exception as e:
logger.error(f"Error formatting citation: {e}")
return citations
def _format_apa_citation(self, result: Dict[str, Any], index: int) -> str:
"""Format citation in APA style"""
title = result.get("title", "Untitled")
url = result.get("url", "")
date = result.get("date", datetime.utcnow().isoformat())
# Simplified APA format
return f"[{index}] {title}. Retrieved {date[:10]} from {url}"
def _format_basic_citation(self, result: Dict[str, Any], index: int) -> str:
"""Format basic citation"""
title = result.get("title", "Untitled")
source = result.get("source", "Unknown")
return f"[{index}] {title} ({source})"
async def _generate_follow_up_questions(
self, original_query: str, synthesis: Dict[str, Any]
) -> List[str]:
"""Generate relevant follow-up questions"""
try:
# Generate intelligent follow-up questions
return [
f"What are the latest developments in {original_query}?",
f"What are the main challenges related to {original_query}?",
f"How does {original_query} compare to similar topics?",
f"What are expert opinions on {original_query}?",
]
except Exception as e:
logger.error(f"Error generating follow-up questions: {e}")
return []
def _deduplicate_results(
self, results: List[Dict[str, Any]]
) -> List[Dict[str, Any]]:
"""Remove duplicate results"""
seen_titles = set()
unique_results = []
for result in results:
title = result.get("title", "").lower()
if title not in seen_titles:
seen_titles.add(title)
unique_results.append(result)
return unique_results
def _rank_results(
self, results: List[Dict[str, Any]], query: str
) -> List[Dict[str, Any]]:
"""Rank results by relevance"""
# Simple ranking by relevance score and source type
def ranking_key(result):
relevance = result.get("relevance", 0.5)
source_weight = {
"academic": 1.0,
"web": 0.8,
"knowledge_base": 0.9,
"memory": 0.6,
}.get(result.get("source", "web"), 0.5)
return relevance * source_weight
return sorted(results, key=ranking_key, reverse=True)
def _calculate_confidence(self, results: List[Dict[str, Any]]) -> float:
"""Calculate overall confidence in search results"""
if not results:
return 0.0
# Factor in number of sources, relevance scores, and source diversity
avg_relevance = sum(r.get("relevance", 0.5) for r in results) / len(results)
source_diversity = (
len(set(r.get("source", "unknown") for r in results)) / 4.0
) # Max 4 source types
result_count_factor = min(len(results) / 10.0, 1.0) # Up to 10 results
return min((avg_relevance + source_diversity + result_count_factor) / 3.0, 1.0)
def _extract_key_points(self, results: List[Dict[str, Any]]) -> List[str]:
"""Extract key points from results"""
key_points = []
for result in results[:3]: # Top 3 results
content = result.get("snippet", "") or result.get("content", "")
if content:
# Simplified key point extraction
key_points.append(
content[:200] + "..." if len(content) > 200 else content
)
return key_points
def _identify_perspectives(self, results: List[Dict[str, Any]]) -> List[str]:
"""Identify different perspectives in results"""
# Simplified perspective identification
perspectives = []
source_types = set(r.get("source", "unknown") for r in results)
for source_type in source_types:
perspectives.append(f"{source_type.title()} perspective")
return perspectives
def _calculate_synthesis_confidence(self, results: List[Dict[str, Any]]) -> float:
"""Calculate confidence in synthesis quality"""
return min(len(results) / 5.0, 1.0) # Higher confidence with more sources
async def _analyze_sentiment(self, data: Any) -> Dict[str, Any]:
"""Analyze sentiment of data"""
# Placeholder implementation
return {
"sentiment": "neutral",
"confidence": 0.8,
"details": "Sentiment analysis not fully implemented",
}
async def _analyze_trends(self, data: Any) -> Dict[str, Any]:
"""Analyze trends in data"""
# Placeholder implementation
return {"trends": ["stable"], "confidence": 0.7, "timeframe": "30 days"}
async def _summarize_content(self, data: Any) -> Dict[str, Any]:
"""Summarize content"""
# Use AI to summarize
if isinstance(data, str) and len(data) > 500:
summary_prompt = (
f"Summarize this content in 2-3 sentences:\n\n{data[:2000]}"
)
try:
result = await self.ai_manager.generate_text(
summary_prompt,
provider="openai",
model="gpt-3.5-turbo",
max_tokens=200,
)
return {
"summary": result.get("content", "Unable to generate summary"),
"confidence": 0.9,
}
except Exception as e:
logger.error(f"Summarization error: {e}")
return {
"summary": str(data)[:300] + "..." if len(str(data)) > 300 else str(data),
"confidence": 0.6,
}
async def _compare_sources(self, data: Any) -> Dict[str, Any]:
"""Compare multiple sources"""
# Placeholder implementation
return {
"comparison": "Source comparison not fully implemented",
"confidence": 0.5,
}
async def _fact_check(self, data: Any) -> Dict[str, Any]:
"""Perform fact checking"""
# Placeholder implementation
return {
"fact_check_result": "indeterminate",
"confidence": 0.5,
"notes": "Fact checking requires external verification services",
}
async def _bias_analysis(self, data: Any) -> Dict[str, Any]:
"""Analyze potential bias"""
# Placeholder implementation
return {
"bias_detected": False,
"confidence": 0.6,
"analysis": "Bias analysis not fully implemented",
}
def _get_analysis_methodology(self, analysis_type: str) -> str:
"""Get methodology description for analysis type"""
methodologies = {
"sentiment": "Natural language processing with machine learning sentiment classification",
"trends": "Statistical analysis of data patterns over time",
"summarize": "AI-powered text summarization using transformer models",
"compare": "Comparative analysis using similarity metrics",
"fact_check": "Cross-reference verification with trusted sources",
"bias_check": "Multi-dimensional bias detection using linguistic analysis",
}
return methodologies.get(analysis_type, "Standard analytical methodology")
async def _update_research_session(
self, user_id: int, session_id: str, result: Dict[str, Any]
):
"""Update research session tracking"""
try:
if user_id not in self.active_sessions:
self.active_sessions[user_id] = {}
self.active_sessions[user_id][session_id] = {
"timestamp": datetime.utcnow().isoformat(),
"query": result["query"],
"result_summary": result.get("synthesis", {}).get("summary", ""),
"sources_count": result.get("sources_searched", 0),
"confidence": result.get("confidence", 0.0),
}
except Exception as e:
logger.error(f"Error updating research session: {e}")
async def _format_research_response(self, result: Dict[str, Any]) -> str:
"""Format research result for user presentation"""
try:
query = result.get("query", "Unknown query")
synthesis = result.get("synthesis", {})
summary = synthesis.get("summary", "No summary available")
confidence = result.get("confidence", 0.0)
sources_count = result.get("sources_searched", 0)
response = f"**Research Results for: {query}**\n\n"
response += f"{summary}\n\n"
response += (
f"*Searched {sources_count} sources with {confidence:.1%} confidence*"
)
# Add follow-up suggestions
follow_ups = result.get("follow_up_suggestions", [])
if follow_ups:
response += "\n\n**Follow-up questions:**\n"
for i, question in enumerate(follow_ups[:3], 1):
response += f"{i}. {question}\n"
return response
except Exception as e:
logger.error(f"Error formatting response: {e}")
return "Error formatting research results"
# Plugin entry point
main = AdvancedResearchAgent