From 66fcbe82d8e941be04ac9184b5c3a2eed1903b34 Mon Sep 17 00:00:00 2001 From: chenzhikuo Date: Thu, 28 Sep 2017 20:55:42 +0800 Subject: [PATCH] Rename word2vec_helpers.py to word2vec_helpers9.28.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 作者你好,你这个函数有些问题,原程序中sentence处理后并没有赋给新的padding_sentences,而你返回时,返回的却是sentences,所以相当于上面操作是没用的。 --- word2vec_helpers9.28.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 word2vec_helpers9.28.py diff --git a/word2vec_helpers9.28.py b/word2vec_helpers9.28.py new file mode 100644 index 0000000..98823d3 --- /dev/null +++ b/word2vec_helpers9.28.py @@ -0,0 +1,18 @@ +def padding_sentences(input_sentences, padding_token, padding_sentence_length = None): +# sentences = [sentence.split(' ') for sentence in input_sentences] + padding_sentences=[] + #input_sentences为每一句话构成的列表而构成的列表 + if padding_sentence_length is not None: + max_sentence_length = padding_sentence_length + else: + max_sentence_length = max([len(sentence) for sentence in input_sentences]) +# max_sentence_length = padding_sentence_length if padding_sentence_length is not None else max([len(sentence) for sentence in input_sentences]) + for sentence in input_sentences: + #如果句子长度大于给定的长度最大值,则取前max_sentence_length个字符 + if len(sentence) > max_sentence_length: + sentence = sentence[:max_sentence_length] + else: + #如果句子长度小于给定的长度最大值,则用给定的字符将原句填充到最大长度 + sentence.extend([padding_token] * (max_sentence_length - len(sentence))) + padding_sentences.append(sentence) + return padding_sentences, max_sentence_length