- Deleted .env.example file as it is no longer needed. - Added .gitignore to manage ignored files and directories. - Introduced CLAUDE.md for AI provider integration documentation. - Created dev.sh for development setup and scripts. - Updated Dockerfile and Dockerfile.production for improved build processes. - Added multiple test files and directories for comprehensive testing. - Introduced new utility and service files for enhanced functionality. - Organized codebase with new directories and files for better maintainability.
646 lines
24 KiB
Python
646 lines
24 KiB
Python
"""
|
|
Research Agent Plugin - Advanced Information Gathering and Analysis
|
|
Demonstrates research capabilities with web search, data analysis, and synthesis
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
from datetime import datetime, timedelta
|
|
from typing import Any, Dict, List
|
|
|
|
from extensions.plugin_manager import (PluginMetadata, PluginType,
|
|
ResearchAgentPlugin)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class AdvancedResearchAgent(ResearchAgentPlugin):
|
|
"""
|
|
Advanced Research Agent Plugin
|
|
|
|
Features:
|
|
- Multi-source information gathering
|
|
- Real-time web search integration
|
|
- Data synthesis and summarization
|
|
- Citation tracking and verification
|
|
- Collaborative research sessions
|
|
- Research history and caching
|
|
"""
|
|
|
|
@property
|
|
def metadata(self) -> PluginMetadata:
|
|
return PluginMetadata(
|
|
name="research_agent",
|
|
version="1.0.0",
|
|
description="Advanced research agent with multi-source data gathering and analysis",
|
|
author="Discord Quote Bot Team",
|
|
plugin_type=PluginType.RESEARCH_AGENT,
|
|
dependencies=["memory_system"],
|
|
permissions=["web.search", "data.analyze", "cache.write"],
|
|
config_schema={
|
|
"max_search_results": {"type": "integer", "default": 10},
|
|
"search_timeout": {"type": "integer", "default": 30},
|
|
"enable_caching": {"type": "boolean", "default": True},
|
|
"citation_style": {"type": "string", "default": "apa"},
|
|
},
|
|
)
|
|
|
|
async def on_initialize(self):
|
|
"""Initialize the research agent plugin"""
|
|
logger.info("Initializing Research Agent Plugin...")
|
|
|
|
# Configuration
|
|
self.max_search_results = self.config.get("max_search_results", 10)
|
|
self.search_timeout = self.config.get("search_timeout", 30)
|
|
self.enable_caching = self.config.get("enable_caching", True)
|
|
self.citation_style = self.config.get("citation_style", "apa")
|
|
|
|
# Research session tracking
|
|
self.active_sessions: Dict[int, Dict[str, Any]] = {}
|
|
self.research_cache: Dict[str, Dict[str, Any]] = {}
|
|
|
|
# Register event handlers
|
|
self.register_event_handler("research_request", self.handle_research_request)
|
|
self.register_event_handler("analysis_request", self.handle_analysis_request)
|
|
|
|
logger.info("Research Agent Plugin initialized successfully")
|
|
|
|
async def search(self, query: str, context: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Perform comprehensive research search"""
|
|
try:
|
|
user_id = context.get("user_id")
|
|
session_id = context.get(
|
|
"session_id", f"search_{int(datetime.utcnow().timestamp())}"
|
|
)
|
|
|
|
# Check cache first
|
|
cache_key = f"search:{hash(query)}"
|
|
if self.enable_caching and cache_key in self.research_cache:
|
|
cached_result = self.research_cache[cache_key]
|
|
if (
|
|
datetime.utcnow()
|
|
- datetime.fromisoformat(cached_result["timestamp"])
|
|
) < timedelta(hours=24):
|
|
logger.info(f"Returning cached search results for: {query}")
|
|
return cached_result["data"]
|
|
|
|
# Perform multi-source search
|
|
search_results = await self._perform_multi_source_search(query, context)
|
|
|
|
# Analyze and synthesize results
|
|
synthesis = await self._synthesize_results(query, search_results)
|
|
|
|
# Generate citations
|
|
citations = await self._generate_citations(search_results)
|
|
|
|
# Compile final result
|
|
result = {
|
|
"query": query,
|
|
"session_id": session_id,
|
|
"timestamp": datetime.utcnow().isoformat(),
|
|
"sources_searched": len(search_results),
|
|
"synthesis": synthesis,
|
|
"citations": citations,
|
|
"raw_results": search_results[:5], # Limit raw data
|
|
"confidence": self._calculate_confidence(search_results),
|
|
"follow_up_suggestions": await self._generate_follow_up_questions(
|
|
query, synthesis
|
|
),
|
|
}
|
|
|
|
# Cache result
|
|
if self.enable_caching:
|
|
self.research_cache[cache_key] = {
|
|
"data": result,
|
|
"timestamp": datetime.utcnow().isoformat(),
|
|
}
|
|
|
|
# Track in session
|
|
if user_id:
|
|
await self._update_research_session(user_id, session_id, result)
|
|
|
|
return result
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error performing search: {e}")
|
|
return {
|
|
"query": query,
|
|
"error": str(e),
|
|
"timestamp": datetime.utcnow().isoformat(),
|
|
"success": False,
|
|
}
|
|
|
|
async def analyze(self, data: Any, analysis_type: str) -> Dict[str, Any]:
|
|
"""Analyze data using various analytical methods"""
|
|
try:
|
|
analysis_methods = {
|
|
"sentiment": self._analyze_sentiment,
|
|
"trends": self._analyze_trends,
|
|
"summarize": self._summarize_content,
|
|
"compare": self._compare_sources,
|
|
"fact_check": self._fact_check,
|
|
"bias_check": self._bias_analysis,
|
|
}
|
|
|
|
if analysis_type not in analysis_methods:
|
|
return {
|
|
"error": f"Unknown analysis type: {analysis_type}",
|
|
"available_types": list(analysis_methods.keys()),
|
|
}
|
|
|
|
# Perform analysis
|
|
result = await analysis_methods[analysis_type](data)
|
|
|
|
return {
|
|
"analysis_type": analysis_type,
|
|
"timestamp": datetime.utcnow().isoformat(),
|
|
"result": result,
|
|
"confidence": getattr(result, "confidence", 0.8),
|
|
"methodology": self._get_analysis_methodology(analysis_type),
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error performing analysis: {e}")
|
|
return {"error": str(e), "analysis_type": analysis_type, "success": False}
|
|
|
|
async def handle_research_request(self, **kwargs):
|
|
"""Handle research request event"""
|
|
try:
|
|
query = kwargs.get("query")
|
|
user_id = kwargs.get("user_id")
|
|
context = kwargs.get("context", {})
|
|
|
|
if not query:
|
|
return {"error": "No query provided"}
|
|
|
|
# Add user context
|
|
context.update(
|
|
{
|
|
"user_id": user_id,
|
|
"request_type": "research",
|
|
"timestamp": datetime.utcnow().isoformat(),
|
|
}
|
|
)
|
|
|
|
# Perform search
|
|
result = await self.search(query, context)
|
|
|
|
# Generate user-friendly response
|
|
response = await self._format_research_response(result)
|
|
|
|
return {"response": response, "detailed_results": result, "success": True}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error handling research request: {e}")
|
|
return {"error": str(e), "success": False}
|
|
|
|
async def handle_analysis_request(self, **kwargs):
|
|
"""Handle analysis request event"""
|
|
try:
|
|
data = kwargs.get("data")
|
|
analysis_type = kwargs.get("analysis_type", "summarize")
|
|
kwargs.get("user_id")
|
|
|
|
if not data:
|
|
return {"error": "No data provided for analysis"}
|
|
|
|
# Perform analysis
|
|
result = await self.analyze(data, analysis_type)
|
|
|
|
return result
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error handling analysis request: {e}")
|
|
return {"error": str(e), "success": False}
|
|
|
|
async def _perform_multi_source_search(
|
|
self, query: str, context: Dict[str, Any]
|
|
) -> List[Dict[str, Any]]:
|
|
"""Perform search across multiple sources"""
|
|
try:
|
|
search_sources = [
|
|
self._search_web,
|
|
self._search_knowledge_base,
|
|
self._search_memory_system,
|
|
]
|
|
|
|
# Execute searches concurrently
|
|
search_tasks = [source(query, context) for source in search_sources]
|
|
source_results = await asyncio.gather(*search_tasks, return_exceptions=True)
|
|
|
|
# Combine and clean results
|
|
all_results = []
|
|
for i, results in enumerate(source_results):
|
|
if isinstance(results, Exception):
|
|
logger.error(f"Search source {i} failed: {results}")
|
|
continue
|
|
|
|
if isinstance(results, list):
|
|
all_results.extend(results)
|
|
|
|
# Remove duplicates and rank by relevance
|
|
deduplicated = self._deduplicate_results(all_results)
|
|
ranked_results = self._rank_results(deduplicated, query)
|
|
|
|
return ranked_results[: self.max_search_results]
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in multi-source search: {e}")
|
|
return []
|
|
|
|
async def _search_web(
|
|
self, query: str, context: Dict[str, Any]
|
|
) -> List[Dict[str, Any]]:
|
|
"""Search web sources (placeholder implementation)"""
|
|
try:
|
|
# This would integrate with actual web search APIs
|
|
# For demonstration, returning mock results
|
|
return [
|
|
{
|
|
"title": f'Web Result for "{query}"',
|
|
"url": "https://example.com/article1",
|
|
"snippet": f"This is a comprehensive article about {query}...",
|
|
"source": "web",
|
|
"relevance": 0.9,
|
|
"date": datetime.utcnow().isoformat(),
|
|
"type": "article",
|
|
},
|
|
{
|
|
"title": f"Research Paper: {query}",
|
|
"url": "https://academic.example.com/paper1",
|
|
"snippet": f"Academic research on {query} shows...",
|
|
"source": "academic",
|
|
"relevance": 0.95,
|
|
"date": (datetime.utcnow() - timedelta(days=30)).isoformat(),
|
|
"type": "paper",
|
|
},
|
|
]
|
|
except Exception as e:
|
|
logger.error(f"Web search error: {e}")
|
|
return []
|
|
|
|
async def _search_knowledge_base(
|
|
self, query: str, context: Dict[str, Any]
|
|
) -> List[Dict[str, Any]]:
|
|
"""Search internal knowledge base"""
|
|
try:
|
|
# Search memory system for relevant information
|
|
if self.memory_manager:
|
|
memories = await self.memory_manager.search_memories(query, limit=5)
|
|
|
|
results = []
|
|
for memory in memories:
|
|
results.append(
|
|
{
|
|
"title": f'Internal Knowledge: {memory.get("title", "Untitled")}',
|
|
"content": memory.get("content", ""),
|
|
"source": "knowledge_base",
|
|
"relevance": memory.get("similarity", 0.8),
|
|
"date": memory.get(
|
|
"timestamp", datetime.utcnow().isoformat()
|
|
),
|
|
"type": "internal",
|
|
}
|
|
)
|
|
|
|
return results
|
|
|
|
return []
|
|
except Exception as e:
|
|
logger.error(f"Knowledge base search error: {e}")
|
|
return []
|
|
|
|
async def _search_memory_system(
|
|
self, query: str, context: Dict[str, Any]
|
|
) -> List[Dict[str, Any]]:
|
|
"""Search conversation and interaction memory"""
|
|
try:
|
|
# Search for relevant past conversations and interactions
|
|
user_id = context.get("user_id")
|
|
if user_id and self.memory_manager:
|
|
user_memories = await self.memory_manager.get_user_memories(
|
|
user_id, query
|
|
)
|
|
|
|
results = []
|
|
for memory in user_memories:
|
|
results.append(
|
|
{
|
|
"title": "Previous Conversation",
|
|
"content": memory.get("summary", ""),
|
|
"source": "memory",
|
|
"relevance": memory.get("relevance", 0.7),
|
|
"date": memory.get("timestamp"),
|
|
"type": "conversation",
|
|
}
|
|
)
|
|
|
|
return results
|
|
|
|
return []
|
|
except Exception as e:
|
|
logger.error(f"Memory search error: {e}")
|
|
return []
|
|
|
|
async def _synthesize_results(
|
|
self, query: str, results: List[Dict[str, Any]]
|
|
) -> Dict[str, Any]:
|
|
"""Synthesize search results into coherent summary"""
|
|
try:
|
|
if not results:
|
|
return {
|
|
"summary": "No relevant information found.",
|
|
"key_points": [],
|
|
"confidence": 0.0,
|
|
}
|
|
|
|
# Use AI to synthesize information
|
|
synthesis_prompt = f"""
|
|
Based on the following search results for "{query}", provide a comprehensive synthesis:
|
|
|
|
Results:
|
|
{json.dumps([{k: v for k, v in r.items() if k in ['title', 'snippet', 'content']} for r in results[:5]], indent=2)}
|
|
|
|
Provide:
|
|
1. A concise summary (2-3 paragraphs)
|
|
2. Key points (bullet list)
|
|
3. Different perspectives if any
|
|
4. Reliability assessment
|
|
"""
|
|
|
|
ai_response = await self.ai_manager.generate_text(
|
|
synthesis_prompt, provider="openai", model="gpt-4", max_tokens=800
|
|
)
|
|
|
|
# Parse AI response (simplified)
|
|
return {
|
|
"summary": ai_response.get("content", "Unable to generate synthesis"),
|
|
"key_points": self._extract_key_points(results),
|
|
"perspectives": self._identify_perspectives(results),
|
|
"confidence": self._calculate_synthesis_confidence(results),
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error synthesizing results: {e}")
|
|
return {
|
|
"summary": "Error generating synthesis",
|
|
"key_points": [],
|
|
"confidence": 0.0,
|
|
}
|
|
|
|
async def _generate_citations(self, results: List[Dict[str, Any]]) -> List[str]:
|
|
"""Generate properly formatted citations"""
|
|
citations = []
|
|
|
|
for i, result in enumerate(results[:5], 1):
|
|
try:
|
|
if self.citation_style == "apa":
|
|
citation = self._format_apa_citation(result, i)
|
|
else:
|
|
citation = self._format_basic_citation(result, i)
|
|
|
|
citations.append(citation)
|
|
except Exception as e:
|
|
logger.error(f"Error formatting citation: {e}")
|
|
|
|
return citations
|
|
|
|
def _format_apa_citation(self, result: Dict[str, Any], index: int) -> str:
|
|
"""Format citation in APA style"""
|
|
title = result.get("title", "Untitled")
|
|
url = result.get("url", "")
|
|
date = result.get("date", datetime.utcnow().isoformat())
|
|
|
|
# Simplified APA format
|
|
return f"[{index}] {title}. Retrieved {date[:10]} from {url}"
|
|
|
|
def _format_basic_citation(self, result: Dict[str, Any], index: int) -> str:
|
|
"""Format basic citation"""
|
|
title = result.get("title", "Untitled")
|
|
source = result.get("source", "Unknown")
|
|
return f"[{index}] {title} ({source})"
|
|
|
|
async def _generate_follow_up_questions(
|
|
self, original_query: str, synthesis: Dict[str, Any]
|
|
) -> List[str]:
|
|
"""Generate relevant follow-up questions"""
|
|
try:
|
|
# Generate intelligent follow-up questions
|
|
return [
|
|
f"What are the latest developments in {original_query}?",
|
|
f"What are the main challenges related to {original_query}?",
|
|
f"How does {original_query} compare to similar topics?",
|
|
f"What are expert opinions on {original_query}?",
|
|
]
|
|
except Exception as e:
|
|
logger.error(f"Error generating follow-up questions: {e}")
|
|
return []
|
|
|
|
def _deduplicate_results(
|
|
self, results: List[Dict[str, Any]]
|
|
) -> List[Dict[str, Any]]:
|
|
"""Remove duplicate results"""
|
|
seen_titles = set()
|
|
unique_results = []
|
|
|
|
for result in results:
|
|
title = result.get("title", "").lower()
|
|
if title not in seen_titles:
|
|
seen_titles.add(title)
|
|
unique_results.append(result)
|
|
|
|
return unique_results
|
|
|
|
def _rank_results(
|
|
self, results: List[Dict[str, Any]], query: str
|
|
) -> List[Dict[str, Any]]:
|
|
"""Rank results by relevance"""
|
|
|
|
# Simple ranking by relevance score and source type
|
|
def ranking_key(result):
|
|
relevance = result.get("relevance", 0.5)
|
|
source_weight = {
|
|
"academic": 1.0,
|
|
"web": 0.8,
|
|
"knowledge_base": 0.9,
|
|
"memory": 0.6,
|
|
}.get(result.get("source", "web"), 0.5)
|
|
|
|
return relevance * source_weight
|
|
|
|
return sorted(results, key=ranking_key, reverse=True)
|
|
|
|
def _calculate_confidence(self, results: List[Dict[str, Any]]) -> float:
|
|
"""Calculate overall confidence in search results"""
|
|
if not results:
|
|
return 0.0
|
|
|
|
# Factor in number of sources, relevance scores, and source diversity
|
|
avg_relevance = sum(r.get("relevance", 0.5) for r in results) / len(results)
|
|
source_diversity = (
|
|
len(set(r.get("source", "unknown") for r in results)) / 4.0
|
|
) # Max 4 source types
|
|
result_count_factor = min(len(results) / 10.0, 1.0) # Up to 10 results
|
|
|
|
return min((avg_relevance + source_diversity + result_count_factor) / 3.0, 1.0)
|
|
|
|
def _extract_key_points(self, results: List[Dict[str, Any]]) -> List[str]:
|
|
"""Extract key points from results"""
|
|
key_points = []
|
|
for result in results[:3]: # Top 3 results
|
|
content = result.get("snippet", "") or result.get("content", "")
|
|
if content:
|
|
# Simplified key point extraction
|
|
key_points.append(
|
|
content[:200] + "..." if len(content) > 200 else content
|
|
)
|
|
|
|
return key_points
|
|
|
|
def _identify_perspectives(self, results: List[Dict[str, Any]]) -> List[str]:
|
|
"""Identify different perspectives in results"""
|
|
# Simplified perspective identification
|
|
perspectives = []
|
|
source_types = set(r.get("source", "unknown") for r in results)
|
|
|
|
for source_type in source_types:
|
|
perspectives.append(f"{source_type.title()} perspective")
|
|
|
|
return perspectives
|
|
|
|
def _calculate_synthesis_confidence(self, results: List[Dict[str, Any]]) -> float:
|
|
"""Calculate confidence in synthesis quality"""
|
|
return min(len(results) / 5.0, 1.0) # Higher confidence with more sources
|
|
|
|
async def _analyze_sentiment(self, data: Any) -> Dict[str, Any]:
|
|
"""Analyze sentiment of data"""
|
|
# Placeholder implementation
|
|
return {
|
|
"sentiment": "neutral",
|
|
"confidence": 0.8,
|
|
"details": "Sentiment analysis not fully implemented",
|
|
}
|
|
|
|
async def _analyze_trends(self, data: Any) -> Dict[str, Any]:
|
|
"""Analyze trends in data"""
|
|
# Placeholder implementation
|
|
return {"trends": ["stable"], "confidence": 0.7, "timeframe": "30 days"}
|
|
|
|
async def _summarize_content(self, data: Any) -> Dict[str, Any]:
|
|
"""Summarize content"""
|
|
# Use AI to summarize
|
|
if isinstance(data, str) and len(data) > 500:
|
|
summary_prompt = (
|
|
f"Summarize this content in 2-3 sentences:\n\n{data[:2000]}"
|
|
)
|
|
|
|
try:
|
|
result = await self.ai_manager.generate_text(
|
|
summary_prompt,
|
|
provider="openai",
|
|
model="gpt-3.5-turbo",
|
|
max_tokens=200,
|
|
)
|
|
return {
|
|
"summary": result.get("content", "Unable to generate summary"),
|
|
"confidence": 0.9,
|
|
}
|
|
except Exception as e:
|
|
logger.error(f"Summarization error: {e}")
|
|
|
|
return {
|
|
"summary": str(data)[:300] + "..." if len(str(data)) > 300 else str(data),
|
|
"confidence": 0.6,
|
|
}
|
|
|
|
async def _compare_sources(self, data: Any) -> Dict[str, Any]:
|
|
"""Compare multiple sources"""
|
|
# Placeholder implementation
|
|
return {
|
|
"comparison": "Source comparison not fully implemented",
|
|
"confidence": 0.5,
|
|
}
|
|
|
|
async def _fact_check(self, data: Any) -> Dict[str, Any]:
|
|
"""Perform fact checking"""
|
|
# Placeholder implementation
|
|
return {
|
|
"fact_check_result": "indeterminate",
|
|
"confidence": 0.5,
|
|
"notes": "Fact checking requires external verification services",
|
|
}
|
|
|
|
async def _bias_analysis(self, data: Any) -> Dict[str, Any]:
|
|
"""Analyze potential bias"""
|
|
# Placeholder implementation
|
|
return {
|
|
"bias_detected": False,
|
|
"confidence": 0.6,
|
|
"analysis": "Bias analysis not fully implemented",
|
|
}
|
|
|
|
def _get_analysis_methodology(self, analysis_type: str) -> str:
|
|
"""Get methodology description for analysis type"""
|
|
methodologies = {
|
|
"sentiment": "Natural language processing with machine learning sentiment classification",
|
|
"trends": "Statistical analysis of data patterns over time",
|
|
"summarize": "AI-powered text summarization using transformer models",
|
|
"compare": "Comparative analysis using similarity metrics",
|
|
"fact_check": "Cross-reference verification with trusted sources",
|
|
"bias_check": "Multi-dimensional bias detection using linguistic analysis",
|
|
}
|
|
|
|
return methodologies.get(analysis_type, "Standard analytical methodology")
|
|
|
|
async def _update_research_session(
|
|
self, user_id: int, session_id: str, result: Dict[str, Any]
|
|
):
|
|
"""Update research session tracking"""
|
|
try:
|
|
if user_id not in self.active_sessions:
|
|
self.active_sessions[user_id] = {}
|
|
|
|
self.active_sessions[user_id][session_id] = {
|
|
"timestamp": datetime.utcnow().isoformat(),
|
|
"query": result["query"],
|
|
"result_summary": result.get("synthesis", {}).get("summary", ""),
|
|
"sources_count": result.get("sources_searched", 0),
|
|
"confidence": result.get("confidence", 0.0),
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error updating research session: {e}")
|
|
|
|
async def _format_research_response(self, result: Dict[str, Any]) -> str:
|
|
"""Format research result for user presentation"""
|
|
try:
|
|
query = result.get("query", "Unknown query")
|
|
synthesis = result.get("synthesis", {})
|
|
summary = synthesis.get("summary", "No summary available")
|
|
confidence = result.get("confidence", 0.0)
|
|
sources_count = result.get("sources_searched", 0)
|
|
|
|
response = f"**Research Results for: {query}**\n\n"
|
|
response += f"{summary}\n\n"
|
|
response += (
|
|
f"*Searched {sources_count} sources with {confidence:.1%} confidence*"
|
|
)
|
|
|
|
# Add follow-up suggestions
|
|
follow_ups = result.get("follow_up_suggestions", [])
|
|
if follow_ups:
|
|
response += "\n\n**Follow-up questions:**\n"
|
|
for i, question in enumerate(follow_ups[:3], 1):
|
|
response += f"{i}. {question}\n"
|
|
|
|
return response
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error formatting response: {e}")
|
|
return "Error formatting research results"
|
|
|
|
|
|
# Plugin entry point
|
|
main = AdvancedResearchAgent
|