Created
April 11, 2015 17:05
-
-
Save yuyay/5c178f5f4f337aa0f7ee to your computer and use it in GitHub Desktop.
relational topic model (I added prediction function)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
# import utils | |
from scipy.special import gammaln, psi | |
# from formatted_logger import formatted_logger | |
eps = 1e-20 | |
# log = formatted_logger('RTM', 'info') | |
class rtm: | |
""" implementation of relational topic model by Chang and Blei (2009) | |
I implemented the exponential link probability function in here | |
""" | |
def __init__(self, num_topic, num_doc, num_voca, doc_ids, doc_cnt, doc_links, rho): | |
self.D = num_doc | |
self.K = num_topic | |
self.V = num_voca | |
self.alpha = .1 | |
self.gamma = np.random.gamma(100., 1./100, [self.D, self.K]) | |
self.beta = np.random.dirichlet([5]*self.V, self.K) | |
self.nu = 0 | |
self.eta = np.random.normal(0.,1, self.K) | |
self.phi = list() | |
self.pi = np.zeros([self.D, self.K]) | |
for di in xrange(self.D): | |
unique_word = len(doc_ids[di]) | |
cnt = doc_cnt[di] | |
self.phi.append(np.random.dirichlet([10]*self.K, unique_word).T) # list of KxW | |
self.pi[di,:] = np.sum(cnt*self.phi[di],1)/np.sum(cnt*self.phi[di]) | |
self.doc_ids = doc_ids | |
self.doc_cnt = doc_cnt | |
self.doc_links = doc_links | |
self.rho = rho #regularization parameter | |
# log.info('Initialize RTM: num_voca:%d, num_topic:%d, num_doc:%d' % (self.V,self.K,self.D)) | |
def posterior_inference(self, max_iter, tol=10**-2): | |
elbo_old = -10**10 | |
for iter in xrange(max_iter): | |
self.variation_update() | |
self.parameter_estimation() | |
elbo = self.compute_elbo() | |
# log.info('%d iter: ELBO = %.3f' % (iter, elbo)) | |
if np.fabs(elbo - elbo_old) < tol: | |
break | |
elbo_old = elbo | |
def compute_elbo(self): | |
""" compute evidence lower bound for trained model | |
""" | |
elbo = 0 | |
e_log_theta = psi(self.gamma) - psi(np.sum(self.gamma, 1))[:,np.newaxis] # D x K | |
log_beta = np.log(self.beta+eps) | |
for di in xrange(self.D): | |
words = self.doc_ids[di] | |
cnt = self.doc_cnt[di] | |
elbo += np.sum(cnt * (self.phi[di] * log_beta[:,words])) # E_q[log p(w_{d,n}|\beta,z_{d,n})] | |
elbo += np.sum((self.alpha - 1.)*e_log_theta[di,:]) # E_q[log p(\theta_d | alpha)] | |
elbo += np.sum(self.phi[di].T * e_log_theta[di,:]) # E_q[log p(z_{d,n}|\theta_d)] | |
elbo += -gammaln(np.sum(self.gamma[di,:])) + np.sum(gammaln(self.gamma[di,:])) \ | |
- np.sum((self.gamma[di,:] - 1.)*(e_log_theta[di,:])) # - E_q[log q(theta|gamma)] | |
elbo += - np.sum(cnt * self.phi[di] * np.log(self.phi[di])) # - E_q[log q(z|phi)] | |
for adi in self.doc_links[di]: | |
elbo += np.dot(self.eta, self.pi[di]*self.pi[adi]) + self.nu # E_q[log p(y_{d1,d2}|z_{d1},z_{d2},\eta,\nu)] | |
return elbo | |
def variation_update(self): | |
#update phi, gamma | |
e_log_theta = psi(self.gamma) - psi(np.sum(self.gamma, 1))[:,np.newaxis] | |
new_beta = np.zeros([self.K, self.V]) | |
for di in xrange(self.D): | |
words = self.doc_ids[di] | |
cnt = self.doc_cnt[di] | |
doc_len = np.sum(cnt) | |
new_phi = np.log(self.beta[:,words]+eps) + e_log_theta[di,:][:,np.newaxis] | |
gradient = np.zeros(self.K) | |
for adi in self.doc_links[di]: | |
gradient += self.eta * self.pi[adi,:] / doc_len | |
new_phi += gradient[:,np.newaxis] | |
new_phi = np.exp(new_phi) | |
new_phi = new_phi/np.sum(new_phi,0) | |
self.phi[di] = new_phi | |
self.pi[di,:] = np.sum(cnt * self.phi[di],1)/np.sum(cnt * self.phi[di]) | |
self.gamma[di,:] = np.sum(cnt * self.phi[di], 1) + self.alpha | |
new_beta[:, words] += (cnt * self.phi[di]) | |
self.beta = new_beta / np.sum(new_beta, 1)[:,np.newaxis] | |
def parameter_estimation(self): | |
#update eta, nu | |
pi_sum = np.zeros(self.K) | |
num_links = 0. | |
for di in xrange(self.D): | |
for adi in self.doc_links[di]: | |
pi_sum += self.pi[di,:]*self.pi[adi,:] | |
num_links += 1 | |
num_links /= 2. # divide by 2 for bidirectional edge | |
pi_sum /= 2. | |
pi_alpha = np.zeros(self.K) + self.alpha/(self.alpha*self.K)*self.alpha/(self.alpha*self.K) | |
self.nu = np.log(num_links-np.sum(pi_sum)) - np.log(self.rho*(self.K-1)/self.K + num_links - np.sum(pi_sum)) | |
self.eta = np.log(pi_sum) - np.log(pi_sum + self.rho * pi_alpha) - self.nu | |
def save_model(self, output_directory, vocab=None): | |
import os | |
if not os.path.exists(output_directory): | |
os.mkdir(output_directory) | |
np.savetxt(output_directory+'/eta.txt', self.eta, delimiter='\t') | |
np.savetxt(output_directory+'/beta.txt', self.beta, delimiter='\t') | |
np.savetxt(output_directory+'/gamma.txt',self.gamma,delimiter='\t') | |
with open(output_directory+'/nu.txt', 'w') as f: | |
f.write('%f\n'%self.nu) | |
if vocab != None: | |
utils.write_top_words(self.beta, vocab, output_directory+'/top_words.csv') | |
def predict_edges(self, doc_ids, doc_cnt, max_iter=100, tol=10**-2): | |
D_te = len(doc_ids) | |
gamma_te = np.random.gamma(100., 1./100, [D_te, self.K]) | |
phi_te = list() | |
for di in range(D_te): | |
unique_word = len(doc_ids[di]) | |
phi_te.append(np.random.dirichlet([10]*self.K, unique_word).T) | |
elbo_old = -10**20 | |
for it in range(max_iter): | |
e_log_theta = psi(gamma_te) - psi(np.sum(gamma_te, 1))[:,np.newaxis] | |
for di in range(D_te): | |
words = doc_ids[di] | |
cnt = doc_cnt[di] | |
new_phi = np.log(self.beta[:,words]+eps) + e_log_theta[di,:][:,np.newaxis] | |
new_phi = np.exp(new_phi) | |
phi_te[di] = new_phi/np.sum(new_phi,0) | |
gamma_te[di,:] = np.sum(cnt * phi_te[di], 1) + self.alpha | |
elbo = self._test_elbo_without_edges(doc_ids, doc_cnt, D_te, e_log_theta, phi_te, gamma_te) | |
if np.fabs(elbo - elbo_old) < tol: | |
break | |
elbo_old = elbo | |
e_z_te = list() | |
for di in range(D_te): | |
total_word = sum(doc_cnt[di]) | |
unique_word = len(doc_ids[di]) | |
I = np.diagflat(doc_cnt[di]) | |
e_z_te.append(np.sum(np.dot(phi_te[di], I), axis=1)) | |
edge_prob_matrix = np.zeros((D_te, D_te)) | |
for di in range(D_te): | |
for dj in range(di, D_te): | |
edge_prob_matrix[di, dj] = np.exp(np.inner(self.eta, e_z_te[di] * e_z_te[dj]) + self.nu) | |
edge_prob_matrix[dj, di] = np.exp(np.inner(self.eta, e_z_te[di] * e_z_te[dj]) + self.nu) | |
return gamma_te, phi_te, edge_prob_matrix | |
def _test_elbo_without_edges(self, doc_ids, doc_cnt, D_te, e_log_theta, phi_te, gamma_te): | |
elbo = 0 | |
log_beta = np.log(self.beta+eps) | |
for di in xrange(D_te): | |
words = doc_ids[di] | |
cnt = doc_cnt[di] | |
elbo += np.sum(cnt * (phi_te[di] * log_beta[:,words])) # E_q[log p(w_{d,n}|\beta,z_{d,n})] | |
elbo += np.sum((self.alpha - 1.)*e_log_theta[di,:]) # E_q[log p(\theta_d | alpha)] | |
elbo += np.sum(phi_te[di].T * e_log_theta[di,:]) # E_q[log p(z_{d,n}|\theta_d)] | |
elbo += -gammaln(np.sum(gamma_te[di,:])) + np.sum(gammaln(gamma_te[di,:])) \ | |
- np.sum((gamma_te[di,:] - 1.)*(e_log_theta[di,:])) # - E_q[log q(theta|gamma)] | |
elbo += - np.sum(cnt * phi_te[di] * np.log(phi_te[di])) # - E_q[log q(z|phi)] | |
return elbo | |
rho = 1 | |
num_topic = 2 | |
num_voca = 6 | |
num_doc = 2 | |
doc_ids = [[0,1,4],[2,3,5]] | |
doc_cnt = [[2,2,3],[1,3,1]] | |
doc_links = [[1],[0]] #bidirectional link | |
model = rtm(num_topic, num_doc, num_voca, doc_ids, doc_cnt, doc_links, rho) | |
model.posterior_inference(10) | |
a, b, c = model.predict_edges(doc_ids, doc_cnt) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment