PolMine · November 11, 2019 08:58
diff --git a/gistfile1.txt b/gistfile1.txt
 # This code, which can be adapted easily, can be used to train a word2vec model easily. Note that it
 # relies on the package [wordVectors](https://github.com/bmschmidt/wordVectors).


 library(wordVectors)

 file_out <- "~/Lab/tmp/germaparl.txt"
 vectors_bin <- "~/Lab/tmp/germaparl.bin"

 .fn <- function(x){
  txt <- stringr::str_c(x, collapse = " ")
  readr::write_lines(txt, file_out, append = TRUE)
 }

 corpus("GERMAPARL") %>%
  split(s_attribute = "speech_id") %>%
  get_token_stream(p_attribute = "word") %>%
  lapply(.fn)

 train_word2vec(file_out, vectors_bin, vectors = 200, threads = 7, window = 12, iter = 5, negative_samples = 0)
	# This code, which can be adapted easily, can be used to train a word2vec model easily. Note that it
	# relies on the package [wordVectors](https://github.com/bmschmidt/wordVectors).


	library(wordVectors)

	file_out <- "~/Lab/tmp/germaparl.txt"
	vectors_bin <- "~/Lab/tmp/germaparl.bin"

	.fn <- function(x){
	txt <- stringr::str_c(x, collapse = " ")
	readr::write_lines(txt, file_out, append = TRUE)
	}

	corpus("GERMAPARL") %>%
	split(s_attribute = "speech_id") %>%
	get_token_stream(p_attribute = "word") %>%
	lapply(.fn)

	train_word2vec(file_out, vectors_bin, vectors = 200, threads = 7, window = 12, iter = 5, negative_samples = 0)