IntelliCast/topic_classifier.py at main · EhteshamSid/IntelliCast · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
from typing import Dict, List, Tuple, Any
import re
from settings import Config

class PoliticalClassifier:
    """Classifies whether a query is political or not"""

    POLITICAL_KEYWORDS = {
        'elections': ['election', 'vote', 'voting', 'ballot', 'campaign', 'candidate', 'primary', 'general election'],
        'government': ['congress', 'senate', 'house', 'president', 'governor', 'mayor', 'legislature', 'government'],
        'policies': ['policy', 'bill', 'law', 'legislation', 'act', 'regulation', 'executive order'],
        'parties': ['republican', 'democrat', 'democratic', 'gop', 'party', 'political party'],
        'issues': ['immigration', 'healthcare', 'economy', 'tax', 'budget', 'foreign policy', 'defense', 'debt ceiling'],
        'events': ['debate', 'rally', 'protest', 'hearing', 'committee', 'session', 'inauguration'],
        'officials': ['politician', 'representative', 'senator', 'congressman', 'congresswoman', 'official']
    }

    NON_POLITICAL_KEYWORDS = [
        'recipe', 'cooking', 'food', 'sports', 'entertainment', 'movie', 'music',
        'science', 'technology', 'health', 'medical', 'weather', 'travel', 'shopping',
        'fashion', 'beauty', 'gaming', 'video game', 'anime', 'manga', 'fiction'
    ]

    def classify_query(self, query: str) -> Tuple[bool, float, str]:
        """
        Classify if a query is political
        Returns: (is_political, confidence, reasoning)
        """
        query_lower = query.lower()

        # Check for non-political keywords first
        for keyword in self.NON_POLITICAL_KEYWORDS:
            if keyword in query_lower:
                return False, 0.9, f"Query contains non-political keyword: {keyword}"

        # Count political keywords
        political_score = 0
        matched_categories = []

        for category, keywords in self.POLITICAL_KEYWORDS.items():
            for keyword in keywords:
                if keyword in query_lower:
                    political_score += 1
                    if category not in matched_categories:
                        matched_categories.append(category)

        # Calculate confidence based on keyword matches
        confidence = min(political_score / 3.0, 1.0)  # Normalize to 0-1

        # Determine if political
        is_political = political_score >= 1

        reasoning = f"Found {political_score} political keywords in categories: {', '.join(matched_categories)}"

        return is_political, confidence, reasoning

class BiasDetector:
    """Detects bias in responses"""

    def __init__(self):
        self.bias_keywords = Config.BIAS_KEYWORDS

    def detect_bias(self, text: str) -> Dict[str, Any]:
        """
        Detect bias in text
        Returns: Dictionary with bias analysis
        """
        text_lower = text.lower()
        bias_analysis = {
            'has_bias': False,
            'bias_types': [],
            'biased_phrases': [],
            'confidence': 0.0
        }

        total_issues = 0

        # Check for partisan language
        for keyword in self.bias_keywords['partisan']:
            if keyword in text_lower:
                bias_analysis['bias_types'].append('partisan')
                bias_analysis['biased_phrases'].append(keyword)
                total_issues += 1

        # Check for emotional language
        for keyword in self.bias_keywords['emotional']:
            if keyword in text_lower:
                bias_analysis['bias_types'].append('emotional')
                bias_analysis['biased_phrases'].append(keyword)
                total_issues += 1

        # Check for absolute statements
        for keyword in self.bias_keywords['absolute']:
            if keyword in text_lower:
                bias_analysis['bias_types'].append('absolute')
                bias_analysis['biased_phrases'].append(keyword)
                total_issues += 1

        # Check for one-sided perspective
        republican_indicators = ['republican', 'gop', 'conservative', 'right-wing']
        democratic_indicators = ['democrat', 'democratic', 'liberal', 'left-wing']

        republican_count = sum(1 for indicator in republican_indicators if indicator in text_lower)
        democratic_count = sum(1 for indicator in democratic_indicators if indicator in text_lower)

        if republican_count > 0 and democratic_count == 0:
            bias_analysis['bias_types'].append('one_sided_republican')
            total_issues += 2
        elif democratic_count > 0 and republican_count == 0:
            bias_analysis['bias_types'].append('one_sided_democratic')
            total_issues += 2

        # Determine if biased
        bias_analysis['has_bias'] = total_issues > 0
        bias_analysis['confidence'] = min(total_issues / 5.0, 1.0)  # Normalize to 0-1

        return bias_analysis

    def suggest_corrections(self, bias_analysis: Dict[str, Any]) -> List[str]:
        """Suggest corrections for detected bias"""
        suggestions = []

        if 'partisan' in bias_analysis['bias_types']:
            suggestions.append("Replace partisan language with neutral terms")

        if 'emotional' in bias_analysis['bias_types']:
            suggestions.append("Use objective language instead of emotional terms")

        if 'absolute' in bias_analysis['bias_types']:
            suggestions.append("Qualify absolute statements with appropriate context")

        if 'one_sided_republican' in bias_analysis['bias_types']:
            suggestions.append("Include Democratic perspective to balance the response")

        if 'one_sided_democratic' in bias_analysis['bias_types']:
            suggestions.append("Include Republican perspective to balance the response")

        return suggestions

class CitationChecker:
    """Checks for proper citations in responses"""

    def check_citations(self, text: str) -> Dict[str, Any]:
        """
        Check if response has proper citations
        Returns: Dictionary with citation analysis
        """
        citation_analysis = {
            'has_citations': False,
            'citation_count': 0,
            'citation_sources': [],
            'missing_citations': [],
            'confidence': 0.0
        }

        # Look for citation patterns
        citation_patterns = [
            r'\[([^\]]+)\]',  # [source]
            r'\(([^)]+)\)',   # (source)
            r'according to ([^,\.]+)',  # according to source
            r'as reported by ([^,\.]+)',  # as reported by source
            r'source: ([^,\.]+)',  # source: name
        ]

        for pattern in citation_patterns:
            matches = re.findall(pattern, text, re.IGNORECASE)
            citation_analysis['citation_count'] += len(matches)
            citation_analysis['citation_sources'].extend(matches)

        # Check for factual claims that need citations
        factual_indicators = [
            'reported', 'announced', 'stated', 'said', 'confirmed', 'revealed',
            'passed', 'voted', 'elected', 'appointed', 'signed', 'enacted'
        ]

        sentences = text.split('.')
        for sentence in sentences:
            sentence_lower = sentence.lower()
            has_factual_claim = any(indicator in sentence_lower for indicator in factual_indicators)
            has_citation = any(pattern in sentence for pattern in citation_patterns)

            if has_factual_claim and not has_citation:
                citation_analysis['missing_citations'].append(sentence.strip())

        citation_analysis['has_citations'] = citation_analysis['citation_count'] > 0
        citation_analysis['confidence'] = min(citation_analysis['citation_count'] / 3.0, 1.0)

        return citation_analysis