Skip to content

Instantly share code, notes, and snippets.

@aliirz
Created December 6, 2024 20:57
Show Gist options
  • Save aliirz/e1ac15ac0bd8b7472fb60107fd0c57bb to your computer and use it in GitHub Desktop.
Save aliirz/e1ac15ac0bd8b7472fb60107fd0c57bb to your computer and use it in GitHub Desktop.
k commons
from fastapi import FastAPI, HTTPException
from transformers import GemmaForCausalLM, GemmaTokenizer
import torch
from typing import Dict, List
import sqlite3
import asyncio
import json
from datetime import datetime
class LocalGemmaProcessor:
def __init__(self, model_size: str = "2b"):
"""Initialize Gemma model for local processing
Args:
model_size: '2b' or '7b' for different Gemma model sizes
"""
self.model_name = f"google/gemma-{model_size}"
self.tokenizer = GemmaTokenizer.from_pretrained(self.model_name)
self.model = GemmaForCausalLM.from_pretrained(
self.model_name,
device_map="auto", # Use GPU if available
torch_dtype=torch.float16 # Use half precision for memory efficiency
)
async def process_academic_text(self, content: str, task: str) -> Dict:
"""Process academic content using Gemma
Args:
content: The academic text to process
task: 'summarize', 'simplify', or 'extract_key_points'
"""
prompt_templates = {
"summarize": "Summarize this academic text while preserving key technical details:\n{text}",
"simplify": "Explain this academic text in simpler terms while keeping accuracy:\n{text}",
"extract_key_points": "Extract and list the main findings from this academic text:\n{text}"
}
prompt = prompt_templates[task].format(text=content)
inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
outputs = self.model.generate(
**inputs,
max_length=1024,
temperature=0.3,
do_sample=True
)
processed_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
return {
"processed_text": processed_text,
"metadata": {
"model": "Gemma",
"model_size": self.model_name,
"task": task,
"timestamp": datetime.now().isoformat(),
"processing_type": "local_ai"
}
}
class KnowledgeNode:
def __init__(self, node_id: str, region: str = "PK"):
self.node_id = node_id
self.region = region
self.db = sqlite3.connect(f"{node_id}.db")
self.ai_processor = LocalGemmaProcessor()
self.setup_db()
def setup_db(self):
"""Initialize local database for storing papers and their processed versions"""
self.db.execute("""
CREATE TABLE IF NOT EXISTS papers (
hash TEXT PRIMARY KEY,
title TEXT,
content TEXT,
language TEXT,
processed_versions TEXT,
metadata TEXT,
timestamp DATETIME,
license TEXT
)
""")
self.db.execute("""
CREATE TABLE IF NOT EXISTS community_feedback (
paper_hash TEXT,
processor_version TEXT,
feedback_text TEXT,
quality_rating INTEGER,
timestamp DATETIME,
FOREIGN KEY(paper_hash) REFERENCES papers(hash)
)
""")
self.db.commit()
app = FastAPI(title="Knowledge Commons Node (Gemma-powered)")
node = KnowledgeNode("karachi_demo_node")
@app.post("/papers/process")
async def process_paper(paper_hash: str, task: str):
"""Process a paper using local Gemma model"""
try:
cursor = node.db.execute(
"SELECT content FROM papers WHERE hash = ?",
(paper_hash,)
)
result = cursor.fetchone()
if not result:
raise HTTPException(status_code=404, detail="Paper not found")
processed = await node.ai_processor.process_academic_text(
result[0], task
)
# Store processed version
node.db.execute(
"UPDATE papers SET processed_versions = json_insert(COALESCE(processed_versions, '{}'), '$." + task + "', ?)",
(json.dumps(processed),)
)
node.db.commit()
return processed
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
# Demo usage
if __name__ == "__main__":
print("Initializing Knowledge Commons Node with Gemma...")
demo_paper = {
"title": "Community Networks and Digital Rights in South Asia",
"content": """
This research examines the implementation of community-operated networks
in South Asian regions with limited internet access. Our findings show
that locally-managed infrastructure can significantly improve digital
rights and knowledge access in underserved communities.
Key findings include:
1. 73% increase in educational resource access
2. Significant improvement in local content creation
3. Enhanced digital privacy awareness
The study suggests that community ownership of digital infrastructure
leads to more sustainable and equitable access to knowledge.
""",
"language": "en",
"license": "CC-BY-SA-4.0"
}
# Demo different processing tasks
tasks = ["summarize", "simplify", "extract_key_points"]
print("\nRunning demo with Gemma model...")
print("This would typically process the paper in multiple ways...")
print("All processing happens locally on the node...")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment