Last active
November 25, 2024 21:01
-
-
Save eterps/b91dfd3555b13b032667382ab88246de to your computer and use it in GitHub Desktop.
Polislite algorithm (inspired by pol.is)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Consensus Statements: | |
- Climate change requires immediate action (strong agreement) | |
Divisive Statements: | |
- Nuclear power is necessary for clean energy | |
- Carbon tax should be implemented globally | |
- Individual actions matter for sustainability | |
- Companies should be held liable for emissions | |
Group Positions: | |
Group 1 characteristics: | |
- strongly agrees with: Climate change requires immediate action | |
- strongly agrees with: Nuclear power is necessary for clean energy | |
- strongly disagrees with: Carbon tax should be implemented globally | |
- strongly disagrees with: Individual actions matter for sustainability | |
- strongly disagrees with: Companies should be held liable for emissions | |
Group 2 characteristics: | |
- strongly agrees with: Climate change requires immediate action | |
- strongly agrees with: Nuclear power is necessary for clean energy | |
- strongly agrees with: Carbon tax should be implemented globally | |
- strongly disagrees with: Individual actions matter for sustainability | |
- strongly agrees with: Companies should be held liable for emissions | |
Group 3 characteristics: | |
- strongly agrees with: Climate change requires immediate action | |
- strongly disagrees with: Nuclear power is necessary for clean energy | |
- strongly agrees with: Carbon tax should be implemented globally | |
- strongly agrees with: Individual actions matter for sustainability | |
- strongly agrees with: Companies should be held liable for emissions |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# moved to: https://github.com/eterps/polislite | |
import numpy as np | |
from sklearn.decomposition import PCA | |
from scipy.cluster import hierarchy | |
from sklearn.metrics import silhouette_score | |
from collections import defaultdict | |
class PolisClusterer: | |
def __init__(self, min_clusters=2, max_clusters=6): | |
self.pca = PCA(n_components=2) | |
self.min_clusters = min_clusters | |
self.max_clusters = max_clusters | |
def analyze_opinions(self, votes, statements): | |
vote_matrix = np.array([ | |
[1 if v == 'agree' else -1 if v == 'disagree' else 0 | |
for v in voter_votes] | |
for voter_votes in votes | |
]) | |
self._handle_sparse_votes(vote_matrix) | |
points_2d = self._compute_pca(vote_matrix) | |
clusters = self._find_optimal_clusters(points_2d) | |
self._generate_report(vote_matrix, clusters, statements) | |
return points_2d, clusters | |
def _handle_sparse_votes(self, matrix): | |
row_means = np.nanmean(matrix, axis=1) | |
for i, row in enumerate(matrix): | |
matrix[i][row == 0] = row_means[i] | |
def _compute_pca(self, matrix): | |
masked_matrix = np.ma.masked_where(matrix == 0, matrix) | |
return self.pca.fit_transform(masked_matrix) | |
def _compute_pattern_difference(self, clusters, points): | |
cluster_means = defaultdict(list) | |
for i, cluster in enumerate(clusters): | |
cluster_means[cluster].append(points[i]) | |
cluster_means = {k: np.mean(v, axis=0) for k, v in cluster_means.items()} | |
# Compute average distance between cluster centers | |
diffs = [] | |
for i in cluster_means: | |
for j in cluster_means: | |
if i < j: | |
diff = np.linalg.norm(cluster_means[i] - cluster_means[j]) | |
diffs.append(diff) | |
return np.mean(diffs) if diffs else 0 | |
def _find_optimal_clusters(self, points): | |
linkage = hierarchy.linkage(points, method='ward') | |
max_clusters = min(self.max_clusters, len(points) - 1) | |
scores = [] | |
for n in range(self.min_clusters, max_clusters + 1): | |
clusters = hierarchy.fcluster(linkage, t=n, criterion='maxclust') | |
silhouette = silhouette_score(points, clusters) if len(np.unique(clusters)) > 1 else -1 | |
group_sizes = np.bincount(clusters) | |
size_balance = np.min(group_sizes) / np.max(group_sizes) | |
pattern_diff = self._compute_pattern_difference(clusters, points) | |
score = (silhouette * 0.4 + size_balance * 0.3 + pattern_diff * 0.3) | |
scores.append(score) | |
optimal_n = self.min_clusters + np.argmax(scores) | |
return hierarchy.fcluster(linkage, t=optimal_n, criterion='maxclust') | |
def _generate_report(self, vote_matrix, clusters, statements): | |
statement_scores = np.mean(vote_matrix, axis=0) | |
agreement_levels = np.std(vote_matrix, axis=0) | |
print('Consensus Statements:') | |
for stmt, score, agree in zip(statements, statement_scores, agreement_levels): | |
if agree < 0.5: | |
consensus = 'strong agreement' if score > 0.5 else 'strong disagreement' | |
print(f'- {stmt} ({consensus})') | |
print('\nDivisive Statements:') | |
for stmt, agree in zip(statements, agreement_levels): | |
if agree >= 0.5: | |
print(f'- {stmt}') | |
cluster_opinions = defaultdict(list) | |
for i, cluster_id in enumerate(clusters): | |
cluster_opinions[cluster_id].append(vote_matrix[i]) | |
print('\nGroup Positions:') | |
for grp_id in sorted(cluster_opinions.keys()): | |
opinions = np.mean(cluster_opinions[grp_id], axis=0) | |
print(f'\nGroup {grp_id} characteristics:') | |
for stmt, opinion in zip(statements, opinions): | |
if abs(opinion) > 0.5: | |
stance = 'strongly agrees with' if opinion > 0 else 'strongly disagrees with' | |
print(f'- {stance}: {stmt}') | |
# Example usage | |
statements = [ | |
'Climate change requires immediate action', | |
'Nuclear power is necessary for clean energy', | |
'Carbon tax should be implemented globally', | |
'Individual actions matter for sustainability', | |
'Companies should be held liable for emissions' | |
] | |
votes = [ | |
# Group 1: Environmental purists (anti-nuclear) | |
['agree', 'disagree', 'agree', 'agree', 'agree'], | |
['agree', 'disagree', 'agree', 'agree', 'agree'], | |
['agree', 'disagree', 'agree', 'agree', 'agree'], | |
# Group 2: Tech-focused environmentalists (pro-nuclear) | |
['agree', 'agree', 'agree', 'disagree', 'agree'], | |
['agree', 'agree', 'agree', 'disagree', 'agree'], | |
['agree', 'agree', 'agree', 'disagree', 'agree'], | |
# Group 3: Business-oriented (anti-regulation) | |
['agree', 'agree', 'disagree', 'disagree', 'disagree'], | |
['agree', 'agree', 'disagree', 'disagree', 'disagree'], | |
['agree', 'agree', 'disagree', 'disagree', 'disagree'] | |
] | |
clusterer = PolisClusterer() | |
points, clusters = clusterer.analyze_opinions(votes, statements) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment