Add BERTopic.

This commit is contained in:
戒酒的李白
2025-08-12 19:01:20 +08:00
parent e2323d579c
commit c5c530775e
256 changed files with 28666 additions and 0 deletions
@@ -0,0 +1,14 @@
<svg width="387" height="56" viewBox="0 0 387 56" fill="none" xmlns="http://www.w3.org/2000/svg">
<rect width="387" height="56" fill="white"/>
<rect x="0.5" y="13.5" width="88" height="42" fill="white" stroke="black"/>
<text fill="#757474" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="10" letter-spacing="0em"><tspan x="30" y="9.96973">SBERT</tspan></text>
<text fill="#757474" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="10" letter-spacing="0em"><tspan x="150" y="9.96973">Logistic Regression</tspan></text>
<text fill="#757474" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="10" letter-spacing="0em"><tspan x="320" y="9.96973">c-TF-IDF</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="12" letter-spacing="0em"><tspan x="9" y="37.7637">Embeddings</tspan></text>
<rect x="142.5" y="13.5" width="105" height="42" fill="white" stroke="black"/>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="12" letter-spacing="0em"><tspan x="173.045" y="37.7637">Classifier</tspan></text>
<path d="M126.707 32.7071C127.098 32.3166 127.098 31.6834 126.707 31.2929L120.343 24.9289C119.953 24.5384 119.319 24.5384 118.929 24.9289C118.538 25.3195 118.538 25.9526 118.929 26.3431L124.586 32L118.929 37.6569C118.538 38.0474 118.538 38.6805 118.929 39.0711C119.319 39.4616 119.953 39.4616 120.343 39.0711L126.707 32.7071ZM99 33H126V31H99V33Z" fill="black"/>
<rect x="295.5" y="13.5" width="91" height="42" fill="white" stroke="black"/>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="12" letter-spacing="0em"><tspan x="327.279" y="31.7637">Topic&#10;</tspan><tspan x="303.215" y="45.7637">representation</tspan></text>
<path d="M285.707 32.7071C286.098 32.3166 286.098 31.6834 285.707 31.2929L279.343 24.9289C278.953 24.5384 278.319 24.5384 277.929 24.9289C277.538 25.3195 277.538 25.9526 277.929 26.3431L283.586 32L277.929 37.6569C277.538 38.0474 277.538 38.6805 277.929 39.0711C278.319 39.4616 278.953 39.4616 279.343 39.0711L285.707 32.7071ZM258 33H285V31H258V33Z" fill="black"/>
</svg>

After

Width:  |  Height:  |  Size: 2.2 KiB

@@ -0,0 +1,18 @@
<svg width="534" height="57" viewBox="0 0 534 57" fill="none" xmlns="http://www.w3.org/2000/svg">
<rect width="534" height="57" fill="white"/>
<rect x="0.5" y="14.5" width="88" height="42" fill="white" stroke="black"/>
<text fill="#757474" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="10" letter-spacing="0em"><tspan x="30" y="10.9697">SBERT</tspan></text>
<text fill="#757474" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="10" letter-spacing="0em"><tspan x="183" y="10.9697">UMAP</tspan></text>
<text fill="#757474" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="10" letter-spacing="0em"><tspan x="313" y="10.9697">HDBSCAN</tspan></text>
<text fill="#757474" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="10" letter-spacing="0em"><tspan x="468" y="10.9697">c-TF-IDF</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="12" letter-spacing="0em"><tspan x="9" y="38.7637">Embeddings</tspan></text>
<rect x="142.5" y="14.5" width="105" height="42" fill="white" stroke="black"/>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="12" letter-spacing="0em"><tspan x="156.094" y="33.7637">Dimensionality &#10;</tspan><tspan x="171.762" y="47.7637">reduction</tspan></text>
<path d="M126.707 33.7071C127.098 33.3166 127.098 32.6834 126.707 32.2929L120.343 25.9289C119.953 25.5384 119.319 25.5384 118.929 25.9289C118.538 26.3195 118.538 26.9526 118.929 27.3431L124.586 33L118.929 38.6569C118.538 39.0474 118.538 39.6805 118.929 40.0711C119.319 40.4616 119.953 40.4616 120.343 40.0711L126.707 33.7071ZM99 34H126V32H99V34Z" fill="black"/>
<rect x="295.5" y="14.5" width="91" height="42" fill="white" stroke="black"/>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="12" letter-spacing="0em"><tspan x="317" y="38.7637">Clustering</tspan></text>
<path d="M285.707 33.7071C286.098 33.3166 286.098 32.6834 285.707 32.2929L279.343 25.9289C278.953 25.5384 278.319 25.5384 277.929 25.9289C277.538 26.3195 277.538 26.9526 277.929 27.3431L283.586 33L277.929 38.6569C277.538 39.0474 277.538 39.6805 277.929 40.0711C278.319 40.4616 278.953 40.4616 279.343 40.0711L285.707 33.7071ZM258 34H285V32H258V34Z" fill="black"/>
<rect x="442.5" y="14.5" width="91" height="42" fill="white" stroke="black"/>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="12" letter-spacing="0em"><tspan x="472.404" y="30.7637">Topic &#10;</tspan><tspan x="450.215" y="44.7637">representation</tspan></text>
<path d="M426.707 33.7071C427.098 33.3166 427.098 32.6834 426.707 32.2929L420.343 25.9289C419.953 25.5384 419.319 25.5384 418.929 25.9289C418.538 26.3195 418.538 26.9526 418.929 27.3431L424.586 33L418.929 38.6569C418.538 39.0474 418.538 39.6805 418.929 40.0711C419.319 40.4616 419.953 40.4616 420.343 40.0711L426.707 33.7071ZM399 34H426V32H399V34Z" fill="black"/>
</svg>

After

Width:  |  Height:  |  Size: 3.0 KiB

@@ -0,0 +1,16 @@
<svg version="1.1" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 141.29942040537162 108.05440778228649" width="423.89826121611486" height="324.16322334685947">
<!-- svg-source:excalidraw -->
<defs>
<style class="style-fonts">
@font-face {
font-family: "Virgil";
src: url("https://excalidraw.com/Virgil.woff2");
}
@font-face {
font-family: "Cascadia";
src: url("https://excalidraw.com/Cascadia.woff2");
}
</style>
</defs>
<g stroke-linecap="round" transform="translate(10 10) rotate(0 60.64971020268581 44.027203891143245)"><path d="M-2.73 1.92 C24.06 1.09, 56.8 -3.83, 119.99 -2.47 M1.8 1.6 C39.19 0.7, 76.15 0.87, 122.58 -0.36 M123.19 3.98 C118.93 26.12, 126.15 49.95, 118.13 84.42 M120.17 0 C120.71 33.24, 123.86 64.82, 123.23 86.14 M123.15 86.29 C93.9 84.44, 73.27 85.17, -2.56 87.54 M119.47 89.23 C82.71 86.5, 39.34 85.91, 1.55 89.48 M-1.53 87.36 C-2.82 58.23, -1.26 26.16, 1.97 0.48 M-0.96 87.6 C3.28 54.37, 0.14 24.21, 1.59 -0.59" stroke="#000000" stroke-width="1" fill="none"></path></g></svg>

After

Width:  |  Height:  |  Size: 1.0 KiB

@@ -0,0 +1,120 @@
Although topic modeling is typically done by discovering topics in an unsupervised manner, there might be times when you already have a bunch of clusters or classes from which you want to model the topics. For example, the often used [20 NewsGroups dataset](https://scikit-learn.org/0.19/datasets/twenty_newsgroups.html) is already split up into 20 classes. Similarly, you might already have created some labels yourself through packages like [human-learn](https://github.com/koaning/human-learn), [bulk](https://github.com/koaning/bulk), [thisnotthat](https://github.com/TutteInstitute/thisnotthat) or something entirely different.
Instead of using BERTopic to discover previously unknown topics, we are now going to manually pass them to BERTopic and try to learn the relationship between those topics and the input documents.
> In other words, we are going to be performing classification instead!
We can view this as a supervised topic modeling approach. Instead of using a clustering algorithm, we are going to be using a classification algorithm instead.
Generally, we have the following pipeline:
<br>
<div class="svg_image">
--8<-- "docs/getting_started/supervised/default_pipeline.svg"
</div>
<br>
Instead, we are now going to skip over the dimensionality reduction step and replace the clustering step with a classification model:
<br>
<div class="svg_image">
--8<-- "docs/getting_started/supervised/classification_pipeline.svg"
</div>
<br>
In other words, we can pass our labels to BERTopic and it will not only learn how to predict labels for new instances, but it also transforms those labels into topics by running the c-TF-IDF representations on the set of documents within each label. This process allows us to model the topics themselves and similarly gives us the option to use everything BERTopic has to offer.
To do so, we need to skip over the dimensionality reduction step and replace the clustering step with a classification algorithm. We can use the documents and labels from the 20 NewsGroups dataset to create topics from those 20 labels:
```python
from sklearn.datasets import fetch_20newsgroups
# Get labeled data
data = fetch_20newsgroups(subset='all', remove=('headers', 'footers', 'quotes'))
docs = data['data']
y = data['target']
```
Then, we make sure to create empty instances of the dimensionality reduction and clustering steps. We pass those to BERTopic to simply skip over them and go to the topic representation process:
```python
from bertopic import BERTopic
from bertopic.vectorizers import ClassTfidfTransformer
from bertopic.dimensionality import BaseDimensionalityReduction
from sklearn.linear_model import LogisticRegression
# Get labeled data
data = fetch_20newsgroups(subset='all', remove=('headers', 'footers', 'quotes'))
docs = data['data']
y = data['target']
# Skip over dimensionality reduction, replace cluster model with classifier,
# and reduce frequent words while we are at it.
empty_dimensionality_model = BaseDimensionalityReduction()
clf = LogisticRegression()
ctfidf_model = ClassTfidfTransformer(reduce_frequent_words=True)
# Create a fully supervised BERTopic instance
topic_model= BERTopic(
umap_model=empty_dimensionality_model,
hdbscan_model=clf,
ctfidf_model=ctfidf_model
)
topics, probs = topic_model.fit_transform(docs, y=y)
```
Let's take a look at a few topics that we get out of training this way by running `topic_model.get_topic_info()`:
<br>
<div class="svg_image">
--8<-- "docs/getting_started/supervised/table.svg"
</div>
<br>
We can see several interesting topics appearing here. They seem to relate to the 20 classes we had as input. Now, let's map those topics to our original classes to view their relationship:
```python
# Map input `y` to topics
mappings = topic_model.topic_mapper_.get_mappings()
mappings = {value: data["target_names"][key] for key, value in mappings.items()}
# Assign original classes to our topics
df = topic_model.get_topic_info()
df["Class"] = df.Topic.map(mappings)
df
```
<div class="svg_image">
--8<-- "docs/getting_started/supervised/table_classes.svg"
</div>
<br>
We can see that the c-TF-IDF representations extract the words that give a good representation of our input classes. This is all done directly from the labeling. A welcome side-effect is that we now have a classification algorithm that allows us to predict the topics of unseen data:
```python
>>> topic, _ = topic_model.transform("this is a document about cars")
>>> topic_model.get_topic(topic)
[('car', 0.4407600315538472),
('cars', 0.32348015696446325),
('engine', 0.28032518444946686),
('ford', 0.2500224508115155),
('oil', 0.2325984913598611),
('dealer', 0.2310723968585826),
('my', 0.22045777551991935),
('it', 0.21327993649430219),
('tires', 0.20420842634292657),
('brake', 0.20246902481367085)]
```
Moreover, we can still perform BERTopic-specific features like dynamic topic modeling, topics per class, hierarchical topic modeling, modeling topic distributions, etc.
!!! note
The resulting `topics` may be a different mapping from the `y` labels. To map `y` to `topics`, we can run the following:
```python
mappings = topic_model.topic_mapper_.get_mappings()
y_mapped = [mappings[val] for val in y]
```
@@ -0,0 +1,52 @@
<svg width="387" height="347" viewBox="0 0 387 347" fill="none" xmlns="http://www.w3.org/2000/svg">
<rect width="387" height="34" fill="white"/>
<line y1="35" x2="387" y2="35" stroke="#BDBDBD" stroke-width="2"/>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" font-weight="bold" letter-spacing="0em"><tspan x="31" y="24.0576">Topic</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" font-weight="bold" letter-spacing="0em"><tspan x="82" y="24.0576">Count&#10;</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" font-weight="bold" letter-spacing="0em"><tspan x="338" y="23.0576">Name</tspan></text>
<rect y="36" width="387" height="34" fill="white"/>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" font-weight="bold" letter-spacing="0em"><tspan x="7" y="58.0576">0</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="57.3574" y="58.0576">0</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="104.072" y="59.0576">999&#10;</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="215.92" y="59.0576">0_game_hockey_team_25&#10;</tspan></text>
<rect y="70" width="387" height="34" fill="#F5F5F5"/>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="210.232" y="92.0576">1_god_church_jesus_christ&#10;</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="104.072" y="92.0576">997&#10;</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="57.3574" y="92.0576">1</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" font-weight="bold" letter-spacing="0em"><tspan x="7" y="92.0576">1</tspan></text>
<rect y="104" width="387" height="34" fill="white"/>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" font-weight="bold" letter-spacing="0em"><tspan x="7" y="126.058">2</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="57.3574" y="126.058">2</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="104.072" y="127.058">996&#10;</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="236.551" y="127.058">2_bike_dod_ride_bikes&#10;</tspan></text>
<rect y="138" width="387" height="34" fill="#F5F5F5"/>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="213.445" y="160.058">3_baseball_game_he_year&#10;</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="104.072" y="160.058">994&#10;</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="57.3574" y="160.058">3</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" font-weight="bold" letter-spacing="0em"><tspan x="7" y="160.058">3</tspan></text>
<rect y="172" width="387" height="34" fill="white"/>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" font-weight="bold" letter-spacing="0em"><tspan x="7" y="194.058">4</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="57.3574" y="194.058">4</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="104.072" y="195.058">991&#10;</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="197.545" y="195.058">4_key_encryption_db_clipper&#10;</tspan></text>
<rect y="206" width="387" height="34" fill="#F5F5F5"/>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="229.674" y="228.058">5_car_cars_engine_ford&#10;</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="104.072" y="228.058">990</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="57.3574" y="228.058">5</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" font-weight="bold" letter-spacing="0em"><tspan x="7" y="228.058">5</tspan></text>
<rect y="240" width="387" height="34" fill="white"/>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" font-weight="bold" letter-spacing="0em"><tspan x="7" y="262.058">6</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="57.3574" y="262.058">6</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="104.072" y="263.058">990</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="157.568" y="263.058">6_medical_patients_cancer_disease&#10;</tspan></text>
<rect y="274" width="387" height="34" fill="#F5F5F5"/>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="181.781" y="296.058">7_window_server_widget_motif&#10;</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="104.072" y="296.058">988&#10;</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="57.3574" y="296.058">7</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" font-weight="bold" letter-spacing="0em"><tspan x="7" y="296.058">7</tspan></text>
<rect y="308" width="387" height="34" fill="white"/>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" font-weight="bold" letter-spacing="0em"><tspan x="7" y="330.058">8</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="57.3574" y="330.058">8</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="104.072" y="331.058">988&#10;</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="207.129" y="331.058">8_space_launch_nasa_orbit&#10;</tspan></text>
</svg>

After

Width:  |  Height:  |  Size: 7.6 KiB

@@ -0,0 +1,62 @@
<svg width="550" height="347" viewBox="0 0 550 347" fill="none" xmlns="http://www.w3.org/2000/svg">
<rect width="550" height="34" fill="white"/>
<line y1="35" x2="547" y2="35" stroke="#BDBDBD" stroke-width="2"/>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" font-weight="bold" letter-spacing="0em"><tspan x="31" y="24.0576">Topic</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" font-weight="bold" letter-spacing="0em"><tspan x="82" y="24.0576">Count&#10;</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" font-weight="bold" letter-spacing="0em"><tspan x="338" y="23.0576">Name</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" font-weight="bold" letter-spacing="0em"><tspan x="498" y="23.0576">Class</tspan></text>
<rect y="36" width="550" height="34" fill="white"/>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" font-weight="bold" letter-spacing="0em"><tspan x="7" y="58.0576">0</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="57.3574" y="58.0576">0</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="104.072" y="59.0576">999&#10;</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="215.92" y="59.0576">0_game_hockey_team_25&#10;</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="436.131" y="60.0576">rec.sport.hockey&#10;</tspan></text>
<rect y="70" width="550" height="34" fill="#F5F5F5"/>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="210.232" y="92.0576">1_god_church_jesus_christ&#10;</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="104.072" y="92.0576">997&#10;</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="57.3574" y="92.0576">1</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" font-weight="bold" letter-spacing="0em"><tspan x="7" y="92.0576">1</tspan></text>
<rect y="104" width="550" height="34" fill="white"/>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" font-weight="bold" letter-spacing="0em"><tspan x="7" y="126.058">2</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="57.3574" y="126.058">2</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="104.072" y="127.058">996&#10;</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="236.551" y="127.058">2_bike_dod_ride_bikes&#10;</tspan></text>
<rect y="138" width="550" height="34" fill="#F5F5F5"/>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="213.445" y="160.058">3_baseball_game_he_year&#10;</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="104.072" y="160.058">994&#10;</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="57.3574" y="160.058">3</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" font-weight="bold" letter-spacing="0em"><tspan x="7" y="160.058">3</tspan></text>
<rect y="172" width="550" height="34" fill="white"/>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" font-weight="bold" letter-spacing="0em"><tspan x="7" y="194.058">4</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="57.3574" y="194.058">4</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="104.072" y="195.058">991&#10;</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="197.545" y="195.058">4_key_encryption_db_clipper&#10;</tspan></text>
<rect y="206" width="550" height="34" fill="#F5F5F5"/>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="229.674" y="228.058">5_car_cars_engine_ford&#10;</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="104.072" y="228.058">990</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="57.3574" y="228.058">5</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" font-weight="bold" letter-spacing="0em"><tspan x="7" y="228.058">5</tspan></text>
<rect y="240" width="550" height="34" fill="white"/>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" font-weight="bold" letter-spacing="0em"><tspan x="7" y="262.058">6</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="57.3574" y="262.058">6</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="104.072" y="263.058">990</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="157.568" y="263.058">6_medical_patients_cancer_disease&#10;</tspan></text>
<rect y="274" width="550" height="34" fill="#F5F5F5"/>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="181.781" y="296.058">7_window_server_widget_motif&#10;</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="104.072" y="296.058">988&#10;</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="57.3574" y="296.058">7</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" font-weight="bold" letter-spacing="0em"><tspan x="7" y="296.058">7</tspan></text>
<rect y="308" width="550" height="34" fill="white"/>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" font-weight="bold" letter-spacing="0em"><tspan x="7" y="330.058">8</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="57.3574" y="330.058">8</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="104.072" y="331.058">988&#10;</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="207.129" y="331.058">8_space_launch_nasa_orbit&#10;</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="482.67" y="331.058">sci.space&#10;</tspan><tspan x="538" y="348.058">&#10;</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="435.652" y="297.058">comp.windows.x&#10;</tspan><tspan x="538" y="314.058">&#10;</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="490.982" y="264.058">sci.med&#10;</tspan><tspan x="538" y="281.058">&#10;</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="481.18" y="229.058">rec.autos&#10;</tspan><tspan x="538" y="246.058">&#10;</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="486.936" y="195.058">sci.crypt&#10;</tspan><tspan x="538" y="212.058">&#10;</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="429.117" y="160.058">rec.sport.baseball&#10;</tspan><tspan x="538" y="177.058">&#10;</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="441.463" y="127.058">rec.motorcycles&#10;</tspan></text>
<text fill="black" xml:space="preserve" style="white-space: pre" font-family="Tahoma" font-size="14" letter-spacing="0em"><tspan x="412.232" y="92.0576">soc.religion.christian&#10;</tspan></text>
</svg>

After

Width:  |  Height:  |  Size: 9.7 KiB