Last active
March 7, 2018 18:08
-
-
Save nojima/95d0d813f18a990ff7ee0c1d4625f597 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
In [1]: %load_ext autoreload | |
In [2]: %autoreload 2 | |
In [3]: from stan_lda import * | |
In [4]: import os | |
In [5]: os.environ['CC'] = 'gcc-4.8' | |
In [6]: word_ids, doc_ids, vocab = read_corpus("ptb.train.txt", 100) | |
In [7]: fit = run_stan(word_ids, doc_ids, vocab, n_topics=7) | |
INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_d8759dc0e20014dd3ccabfc20650f402 NOW. | |
Iteration: 1 / 300 [ 0%] (Warmup) (Chain 2) | |
Iteration: 1 / 300 [ 0%] (Warmup) (Chain 3) | |
Iteration: 1 / 300 [ 0%] (Warmup) (Chain 0) | |
Iteration: 1 / 300 [ 0%] (Warmup) (Chain 1) | |
Iteration: 30 / 300 [ 10%] (Warmup) (Chain 1) | |
Iteration: 30 / 300 [ 10%] (Warmup) (Chain 3) | |
Iteration: 30 / 300 [ 10%] (Warmup) (Chain 0) | |
Iteration: 30 / 300 [ 10%] (Warmup) (Chain 2) | |
Iteration: 60 / 300 [ 20%] (Warmup) (Chain 1) | |
Iteration: 60 / 300 [ 20%] (Warmup) (Chain 2) | |
Iteration: 60 / 300 [ 20%] (Warmup) (Chain 0) | |
Iteration: 60 / 300 [ 20%] (Warmup) (Chain 3) | |
Iteration: 90 / 300 [ 30%] (Warmup) (Chain 2) | |
Iteration: 90 / 300 [ 30%] (Warmup) (Chain 1) | |
Iteration: 90 / 300 [ 30%] (Warmup) (Chain 0) | |
Iteration: 90 / 300 [ 30%] (Warmup) (Chain 3) | |
Iteration: 120 / 300 [ 40%] (Warmup) (Chain 1) | |
Iteration: 120 / 300 [ 40%] (Warmup) (Chain 0) | |
Iteration: 120 / 300 [ 40%] (Warmup) (Chain 2) | |
Iteration: 120 / 300 [ 40%] (Warmup) (Chain 3) | |
Iteration: 150 / 300 [ 50%] (Warmup) (Chain 1) | |
Iteration: 151 / 300 [ 50%] (Sampling) (Chain 1) | |
Iteration: 150 / 300 [ 50%] (Warmup) (Chain 0) | |
Iteration: 151 / 300 [ 50%] (Sampling) (Chain 0) | |
Iteration: 150 / 300 [ 50%] (Warmup) (Chain 2) | |
Iteration: 151 / 300 [ 50%] (Sampling) (Chain 2) | |
Iteration: 150 / 300 [ 50%] (Warmup) (Chain 3) | |
Iteration: 151 / 300 [ 50%] (Sampling) (Chain 3) | |
Iteration: 180 / 300 [ 60%] (Sampling) (Chain 0) | |
Iteration: 180 / 300 [ 60%] (Sampling) (Chain 1) | |
Iteration: 180 / 300 [ 60%] (Sampling) (Chain 3) | |
Iteration: 180 / 300 [ 60%] (Sampling) (Chain 2) | |
Iteration: 210 / 300 [ 70%] (Sampling) (Chain 0) | |
Iteration: 240 / 300 [ 80%] (Sampling) (Chain 0) | |
Iteration: 210 / 300 [ 70%] (Sampling) (Chain 1) | |
Iteration: 210 / 300 [ 70%] (Sampling) (Chain 3) | |
Iteration: 210 / 300 [ 70%] (Sampling) (Chain 2) | |
Iteration: 270 / 300 [ 90%] (Sampling) (Chain 0) | |
Iteration: 300 / 300 [100%] (Sampling) (Chain 0) | |
Iteration: 240 / 300 [ 80%] (Sampling) (Chain 3) | |
# | |
# Elapsed Time: 365.896 seconds (Warm-up) | |
# 356.369 seconds (Sampling) | |
# 722.266 seconds (Total) | |
# | |
Iteration: 240 / 300 [ 80%] (Sampling) (Chain 1) | |
Iteration: 240 / 300 [ 80%] (Sampling) (Chain 2) | |
Iteration: 270 / 300 [ 90%] (Sampling) (Chain 3) | |
Iteration: 270 / 300 [ 90%] (Sampling) (Chain 1) | |
Iteration: 270 / 300 [ 90%] (Sampling) (Chain 2) | |
Iteration: 300 / 300 [100%] (Sampling) (Chain 3) | |
# | |
# Elapsed Time: 383.557 seconds (Warm-up) | |
# 559.784 seconds (Sampling) | |
# 943.34 seconds (Total) | |
# | |
Iteration: 300 / 300 [100%] (Sampling) (Chain 1) | |
# | |
# Elapsed Time: 342.031 seconds (Warm-up) | |
# 641.288 seconds (Sampling) | |
# 983.319 seconds (Total) | |
# | |
Iteration: 300 / 300 [100%] (Sampling) (Chain 2) | |
# | |
# Elapsed Time: 377.516 seconds (Warm-up) | |
# 656.548 seconds (Sampling) | |
# 1034.06 seconds (Total) | |
# |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
phi[5,740] 5.8e-4 5.9e-5 1.5e-32.7e-17 4.8e-9 7.9e-6 3.2e-4 5.7e-3 600 1.0 | |
phi[6,740] 8.3e-4 8.2e-5 2.0e-36.8e-18 2.5e-9 5.6e-6 5.5e-4 6.8e-3 600 1.0 | |
phi[0,741] 7.3e-4 7.5e-5 1.7e-35.5e-16 1.1e-8 7.4e-6 4.4e-4 6.3e-3 547 1.0 | |
phi[1,741] 7.0e-4 7.5e-5 1.8e-31.5e-18 1.1e-8 7.1e-6 3.6e-4 6.3e-3 600 1.0 | |
phi[2,741] 6.9e-4 7.0e-5 1.6e-39.0e-18 1.7e-8 1.3e-5 4.7e-4 6.1e-3 497 1.0 | |
phi[3,741] 7.7e-4 8.0e-5 1.9e-38.6e-18 1.4e-8 1.3e-5 5.5e-4 5.7e-3 580 1.0 | |
phi[4,741] 5.8e-4 6.8e-5 1.6e-31.0e-15 1.5e-8 7.6e-6 3.1e-4 4.8e-3 562 1.01 | |
phi[5,741] 5.2e-4 6.0e-5 1.4e-36.2e-18 4.6e-9 2.5e-6 2.2e-4 4.4e-3 558 1.0 | |
phi[6,741] 7.6e-4 7.1e-5 1.7e-32.4e-17 1.7e-8 9.3e-6 5.1e-4 6.4e-3 600 1.0 | |
phi[0,742] 7.4e-4 8.2e-5 1.8e-31.9e-19 2.7e-9 4.2e-6 5.4e-4 5.9e-3 477 1.0 | |
phi[1,742] 6.5e-4 7.4e-5 1.5e-36.4e-20 1.3e-9 5.3e-6 4.8e-4 6.0e-3 417 1.01 | |
phi[2,742] 5.5e-4 6.3e-5 1.5e-31.7e-16 4.1e-9 2.1e-6 2.4e-4 4.6e-3 567 1.0 | |
phi[3,742] 6.2e-4 5.7e-5 1.4e-31.6e-18 6.5e-9 1.1e-5 4.9e-4 5.4e-3 600 1.0 | |
phi[4,742] 6.1e-4 6.4e-5 1.5e-31.2e-18 6.4e-9 6.2e-6 4.2e-4 4.8e-3 521 1.0 | |
phi[5,742] 5.9e-4 6.4e-5 1.5e-33.1e-16 8.2e-9 7.2e-6 3.1e-4 5.3e-3 529 1.0 | |
phi[6,742] 7.5e-4 7.6e-5 1.9e-33.1e-16 1.6e-8 5.7e-6 5.4e-4 6.8e-3 600 1.0 | |
phi[0,743] 6.7e-4 6.8e-5 1.7e-31.3e-16 2.4e-8 2.0e-5 6.3e-4 5.7e-3 600 1.0 | |
phi[1,743] 7.6e-4 7.3e-5 1.8e-31.1e-16 7.2e-9 1.1e-5 4.7e-4 6.3e-3 583 1.0 | |
phi[2,743] 7.0e-4 7.9e-5 1.7e-33.1e-17 6.4e-9 3.4e-6 4.2e-4 5.5e-3 459 1.0 | |
phi[3,743] 5.6e-4 5.9e-5 1.4e-36.1e-17 2.9e-8 1.2e-5 3.3e-4 4.8e-3 600 1.0 | |
phi[4,743] 6.7e-4 6.9e-5 1.7e-34.0e-16 7.4e-9 5.5e-6 3.5e-4 5.6e-3 600 1.0 | |
phi[5,743] 6.2e-4 6.8e-5 1.7e-39.7e-16 5.0e-9 6.9e-6 3.3e-4 5.3e-3 600 1.0 | |
phi[6,743] 7.6e-4 7.4e-5 1.8e-31.5e-16 1.8e-8 7.4e-6 5.6e-4 6.5e-3 600 1.0 | |
lp__ -2.9e4 7.58 74.65 -2.9e4 -2.9e4 -2.9e4 -2.9e4 -2.9e4 97 1.04 | |
Samples were drawn using NUTS at 2018年03月08日 02時40分57秒. | |
For each parameter, n_eff is a crude measure of effective sample size, | |
and Rhat is the potential scale reduction factor on split chains (at | |
convergence, Rhat=1). | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
In [58]: m = fit.extract() | |
In [59]: phi = m["phi"] | |
In [60]: phi.shape | |
Out[60]: (600, 7, 744) | |
In [61]: mphi = np.mean(phi, axis=0) | |
In [62]: mphi | |
Out[62]: | |
array([[ 0.01934522, 0.00067797, 0.00067912, ..., 0.00073079, | |
0.00074387, 0.00066906], | |
[ 0.05383168, 0.0005909 , 0.00057828, ..., 0.00069623, | |
0.00064964, 0.00076267], | |
[ 0.02318433, 0.00062504, 0.00082581, ..., 0.0006914 , | |
0.00054864, 0.00070076], | |
..., | |
[ 0.06520887, 0.00069524, 0.00068922, ..., 0.00058358, | |
0.00060854, 0.00067 ], | |
[ 0.12132906, 0.00059118, 0.00060603, ..., 0.00052047, | |
0.00058573, 0.0006186 ], | |
[ 0.0069193 , 0.00072506, 0.00085234, ..., 0.00076468, | |
0.00074746, 0.00076338]]) | |
In [63]: for row in mphi.argsort(axis=1): | |
...: print([vocab.word(wid+1) for wid in row[-20:]]) | |
...: | |
['on', 'with', 'year', 'billion', 'at', 'yield', 'by', 'said', 'in', "'s", 'for', 'and', '$', 'from', 'a', '<unk>', 'to', 'of', 'the', 'N'] | |
['by', 'are', 'workers', 'was', 'with', 'asbestos', 'it', 'that', 'is', 'on', "'s", 'said', 'and', 'for', 'in', 'to', 'a', 'of', '<unk>', 'the'] | |
['funds', 'average', 'year', 'said', 'by', 'at', 'yield', "'s", 'billion', 'for', 'and', 'in', '$', 'a', 'from', 'of', 'to', '<unk>', 'the', 'N'] | |
['year', 'at', 'asbestos', 'with', 'by', 'on', 'was', 'it', 'is', 'that', "'s", 'said', 'for', 'in', 'to', 'and', 'a', 'of', 'the', '<unk>'] | |
['asbestos', 'it', 'is', 'was', 'with', 'on', 'that', 'from', '$', 'said', "'s", 'for', 'in', 'and', 'to', 'a', 'N', 'of', 'the', '<unk>'] | |
['this', 'workers', 'was', 'by', 'it', 'asbestos', 'on', 'is', "'s", 'with', 'that', 'said', 'for', 'to', 'in', 'a', 'and', 'of', 'the', '<unk>'] | |
['with', 'that', 'year', 'was', 'asbestos', 'it', 'by', 'on', '<unk>', 'is', 'said', "'s", 'N', 'for', 'and', 'in', 'to', 'of', 'a', 'the'] | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment