# 샘플 3개 출력
for line in result[:3]:
    print(line)

['here', 'are', 'two', 'reasons', 'companies', 'fail', 'they', 'only', 'do', 'more', 'of', 'the', 'same', 'or', 'they', 'only', 'do', 'what', 's', 'new'] ['to', 'me', 'the', 'real', 'real', 'solution', 'to', 'quality', 'growth', 'is', 'figuring', 'out', 'the', 'balance', 'between', 'two', 'activities', 'exploration', 'and', 'exploitation'] ['both', 'are', 'necessary', 'but', 'it', 'can', 'be', 'too', 'much', 'of', 'a', 'good', 'thing']

Word2Vec 훈련시키기

vector_size= 워드 벡터의 특징 값. 즉, 임베딩 된 벡터의 차원.

window= 컨텍스트 윈도우 크기

min_count= 단어 최소 빈도 수 제한 (빈도가 적은 단어들은 학습하지 않는다.)

workers= 학습을 위한 프로세스 수

sg= 0은 CBOW, 1은 Skip-gram.

CBOW

from gensim.models import Word2Vec
from gensim.models import KeyedVectors

model = Word2Vec(sentences=result, vector_size=100, window=5, min_count=5, workers=4, sg=0) # CBOW

model.wv.save_word2vec_format('eng_w2v') # 모델 저장
loaded_model = KeyedVectors.load_word2vec_format("eng_w2v") # 모델 로드

model_result = loaded_model.most_similar("real")
for word, similarity in model_result:
    print(f"{word}: {similarity}")
    

tremendous: 0.45040902495384216 genuine: 0.4448837637901306 physical: 0.4398976266384125 challenge: 0.4395090639591217 changing: 0.43913084268569946 perfect: 0.4350649416446686 complete: 0.430348664522171 worthy: 0.42950013279914856 moral: 0.42892104387283325 personal: 0.4272252321243286

Skip gram

from gensim.models import Word2Vec
from gensim.models import KeyedVectors

model = Word2Vec(sentences=result, vector_size=100, window=5, min_count=5, workers=4, sg=1) #Skip-gram

model.wv.save_word2vec_format('eng_w2v_sg_1') # 모델 저장
loaded_model_sg_1 = KeyedVectors.load_word2vec_format("eng_w2v_sg_1") # 모델 로드

model_result = loaded_model_sg_1.most_similar("real")
for word, similarity in model_result:
    print(f"{word}: {similarity}")
    

tangible: 0.6158655881881714 estate: 0.6035436987876892 genuine: 0.6017087697982788 overload: 0.6008161902427673 boggling: 0.599394679069519 distorted: 0.5986577272415161 virtual: 0.59760981798172 deterrent: 0.5939303636550903 palpable: 0.5927425622940063 transformational: 0.5911173224449158