import nltk
from nltk.tokenize import word_tokenize
text = "Hello, how are you today?"
tokens = word_tokenize(text)
print(tokens) # ['Hello', ',', 'how', 'are', 'you', 'today', '?']from nltk.stem import PorterStemmer
ps = PorterStemmer()
words = ['running', 'runs', 'ran']
stems = [ps.stem(word) for word in words]
print(stems) # ['run', 'run', 'ran']from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
word = 'running'
lemma = lemmatizer.lemmatize(word, pos='v')
print(lemma) # 'run'from sklearn.feature_extraction.text import TfidfVectorizer
corpus = [
'This is the first document.',
'This document is the second document.',
'And this is the third one.'
]
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(corpus)
print(X.toarray())from gensim.models import Word2Vec
sentences = [['hello', 'world'], ['how', 'are', 'you']]
model = Word2Vec(sentences, min_count=1)
# 获取词向量
vector = model.wv['hello']
print(vector)from textblob import TextBlob
text = "I love this movie!"
blob = TextBlob(text)
sentiment = blob.sentiment.polarity
if sentiment > 0:
print("正面情感")
elif sentiment < 0:
print("负面情感")
else:
print("中性情感")from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
# 创建管道
model = Pipeline([
('tfidf', TfidfVectorizer()),
('clf', MultinomialNB())
])
# 训练模型
model.fit(X_train, y_train)
# 预测
y_pred = model.predict(X_test)import spacy
nlp = spacy.load('en_core_web_sm')
doc = nlp("Apple is looking at buying U.K. startup for $1 billion")
for ent in doc.ents:
print(f"{ent.text}: {ent.label_}")from googletrans import Translator
translator = Translator()
result = translator.translate('Hello', src='en', dest='zh-cn')
print(result.text) # 你好时间: 2026-03-23 08:57 AM