Add BERTopic.
This commit is contained in:
@@ -0,0 +1,38 @@
|
||||
import copy
|
||||
import pytest
|
||||
from bertopic.vectorizers import OnlineCountVectorizer
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
("kmeans_pca_topic_model"),
|
||||
("custom_topic_model"),
|
||||
("merged_topic_model"),
|
||||
("reduced_topic_model"),
|
||||
("online_topic_model"),
|
||||
],
|
||||
)
|
||||
def test_online_cv(model, documents, request):
|
||||
topic_model = copy.deepcopy(request.getfixturevalue(model))
|
||||
vectorizer_model = OnlineCountVectorizer(stop_words="english", ngram_range=(2, 2))
|
||||
|
||||
topics = [topic_model.get_topic(topic) for topic in set(topic_model.topics_)]
|
||||
topic_model.update_topics(documents, vectorizer_model=vectorizer_model)
|
||||
new_topics = [topic_model.get_topic(topic) for topic in set(topic_model.topics_)]
|
||||
|
||||
for old_topic, new_topic in zip(topics, new_topics):
|
||||
if old_topic[0][0] != "":
|
||||
assert old_topic != new_topic
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model", [("online_topic_model")])
|
||||
def test_clean_bow(model, request):
|
||||
topic_model = copy.deepcopy(request.getfixturevalue(model))
|
||||
|
||||
original_shape = topic_model.vectorizer_model.X_.shape
|
||||
topic_model.vectorizer_model.delete_min_df = 2
|
||||
topic_model.vectorizer_model._clean_bow()
|
||||
|
||||
assert original_shape[0] == topic_model.vectorizer_model.X_.shape[0]
|
||||
assert original_shape[1] > topic_model.vectorizer_model.X_.shape[1]
|
||||
Reference in New Issue
Block a user