使用NLTK庫簡化文本的方法包括:
from nltk.tokenize import word_tokenize
text = "This is a sample sentence."
tokens = word_tokenize(text)
print(tokens)
from nltk.corpus import stopwords
stop_words = set(stopwords.words('english'))
filtered_tokens = [word for word in tokens if word.lower() not in stop_words]
print(filtered_tokens)
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]
print(lemmatized_tokens)
from nltk import FreqDist
freq_dist = FreqDist(lemmatized_tokens)
print(freq_dist.most_common(5))
通過以上方法,可以使用NLTK庫簡化文本并進行文本處理分析。