Adjust demo paths so they work regardless of the current working directory. --- demo-analogy.sh.orig 2014-09-07 01:54:27.000000000 +0900 +++ demo-analogy.sh 2014-12-24 22:55:24.000000000 +0900 @@ -7,5 +7,5 @@ echo Note that for the word analogy to perform well, the model should be trained on much larger data set echo Example input: paris france berlin echo --------------------------------------------------------------------------------------------------- -time ./word2vec -train text8 -output vectors.bin -cbow 1 -size 200 -window 8 -negative 25 -hs 0 -sample 1e-4 -threads 20 -binary 1 -iter 15 -./word-analogy vectors.bin +time @EXECDIR@/word2vec -train text8 -output vectors.bin -cbow 1 -size 200 -window 8 -negative 25 -hs 0 -sample 1e-4 -threads 20 -binary 1 -iter 15 +@EXECDIR@/word-analogy vectors.bin --- demo-classes.sh.orig 2014-09-07 01:54:27.000000000 +0900 +++ demo-classes.sh 2014-12-24 22:57:00.000000000 +0900 @@ -3,6 +3,6 @@ wget http://mattmahoney.net/dc/text8.zip -O text8.gz gzip -d text8.gz -f fi -time ./word2vec -train text8 -output classes.txt -cbow 1 -size 200 -window 8 -negative 25 -hs 0 -sample 1e-4 -threads 20 -iter 15 -classes 500 +time @EXECDIR@/word2vec -train text8 -output classes.txt -cbow 1 -size 200 -window 8 -negative 25 -hs 0 -sample 1e-4 -threads 20 -iter 15 -classes 500 sort classes.txt -k 2 -n > classes.sorted.txt echo The word classes were saved to file classes.sorted.txt --- demo-phrase-accuracy.sh.orig 2014-09-07 01:54:27.000000000 +0900 +++ demo-phrase-accuracy.sh 2014-12-24 22:57:51.000000000 +0900 @@ -4,8 +4,8 @@ gzip -d news.2012.en.shuffled.gz -f fi sed -e "s/’/'/g" -e "s/′/'/g" -e "s/''/ /g" < news.2012.en.shuffled | tr -c "A-Za-z'_ \n" " " > news.2012.en.shuffled-norm0 -time ./word2phrase -train news.2012.en.shuffled-norm0 -output news.2012.en.shuffled-norm0-phrase0 -threshold 200 -debug 2 -time ./word2phrase -train news.2012.en.shuffled-norm0-phrase0 -output news.2012.en.shuffled-norm0-phrase1 -threshold 100 -debug 2 +time @EXECDIR@/word2phrase -train news.2012.en.shuffled-norm0 -output news.2012.en.shuffled-norm0-phrase0 -threshold 200 -debug 2 +time @EXECDIR@/word2phrase -train news.2012.en.shuffled-norm0-phrase0 -output news.2012.en.shuffled-norm0-phrase1 -threshold 100 -debug 2 tr A-Z a-z < news.2012.en.shuffled-norm0-phrase1 > news.2012.en.shuffled-norm1-phrase1 -time ./word2vec -train news.2012.en.shuffled-norm1-phrase1 -output vectors-phrase.bin -cbow 1 -size 200 -window 10 -negative 25 -hs 0 -sample 1e-5 -threads 20 -binary 1 -iter 15 -./compute-accuracy vectors-phrase.bin < questions-phrases.txt +time @EXECDIR@/word2vec -train news.2012.en.shuffled-norm1-phrase1 -output vectors-phrase.bin -cbow 1 -size 200 -window 10 -negative 25 -hs 0 -sample 1e-5 -threads 20 -binary 1 -iter 15 +@EXECDIR@/compute-accuracy vectors-phrase.bin < questions-phrases.txt --- demo-phrases.sh.orig 2014-09-07 01:54:27.000000000 +0900 +++ demo-phrases.sh 2014-12-24 22:58:20.000000000 +0900 @@ -4,8 +4,8 @@ gzip -d news.2012.en.shuffled.gz -f fi sed -e "s/’/'/g" -e "s/′/'/g" -e "s/''/ /g" < news.2012.en.shuffled | tr -c "A-Za-z'_ \n" " " > news.2012.en.shuffled-norm0 -time ./word2phrase -train news.2012.en.shuffled-norm0 -output news.2012.en.shuffled-norm0-phrase0 -threshold 200 -debug 2 -time ./word2phrase -train news.2012.en.shuffled-norm0-phrase0 -output news.2012.en.shuffled-norm0-phrase1 -threshold 100 -debug 2 +time @EXECDIR@/word2phrase -train news.2012.en.shuffled-norm0 -output news.2012.en.shuffled-norm0-phrase0 -threshold 200 -debug 2 +time @EXECDIR@/word2phrase -train news.2012.en.shuffled-norm0-phrase0 -output news.2012.en.shuffled-norm0-phrase1 -threshold 100 -debug 2 tr A-Z a-z < news.2012.en.shuffled-norm0-phrase1 > news.2012.en.shuffled-norm1-phrase1 -time ./word2vec -train news.2012.en.shuffled-norm1-phrase1 -output vectors-phrase.bin -cbow 1 -size 200 -window 10 -negative 25 -hs 0 -sample 1e-5 -threads 20 -binary 1 -iter 15 -./distance vectors-phrase.bin +time @EXECDIR@/word2vec -train news.2012.en.shuffled-norm1-phrase1 -output vectors-phrase.bin -cbow 1 -size 200 -window 10 -negative 25 -hs 0 -sample 1e-5 -threads 20 -binary 1 -iter 15 +@EXECDIR@/distance vectors-phrase.bin --- demo-word-accuracy.sh.orig 2014-09-07 01:54:27.000000000 +0900 +++ demo-word-accuracy.sh 2014-12-24 22:58:49.000000000 +0900 @@ -3,6 +3,6 @@ wget http://mattmahoney.net/dc/text8.zip -O text8.gz gzip -d text8.gz -f fi -time ./word2vec -train text8 -output vectors.bin -cbow 1 -size 200 -window 8 -negative 25 -hs 0 -sample 1e-4 -threads 20 -binary 1 -iter 15 -./compute-accuracy vectors.bin 30000 < questions-words.txt -# to compute accuracy with the full vocabulary, use: ./compute-accuracy vectors.bin < questions-words.txt +time @EXECDIR@/word2vec -train text8 -output vectors.bin -cbow 1 -size 200 -window 8 -negative 25 -hs 0 -sample 1e-4 -threads 20 -binary 1 -iter 15 +@EXECDIR@/compute-accuracy vectors.bin 30000 < questions-words.txt +# to compute accuracy with the full vocabulary, use: @EXECDIR@/compute-accuracy vectors.bin < questions-words.txt --- demo-word.sh.orig 2014-09-07 01:54:27.000000000 +0900 +++ demo-word.sh 2014-12-24 22:59:00.000000000 +0900 @@ -3,5 +3,5 @@ wget http://mattmahoney.net/dc/text8.zip -O text8.gz gzip -d text8.gz -f fi -time ./word2vec -train text8 -output vectors.bin -cbow 1 -size 200 -window 8 -negative 25 -hs 0 -sample 1e-4 -threads 20 -binary 1 -iter 15 -./distance vectors.bin +time @EXECDIR@/word2vec -train text8 -output vectors.bin -cbow 1 -size 200 -window 8 -negative 25 -hs 0 -sample 1e-4 -threads 20 -binary 1 -iter 15 +@EXECDIR@/distance vectors.bin