【snownlp相关文件】上传自己的模型,调用utils/mynlp

This commit is contained in:
redhongx
2024-07-04 11:54:06 +08:00
parent f09fcb3000
commit 93b72ea2e0
33 changed files with 135956 additions and 0 deletions
File diff suppressed because one or more lines are too long
+43
View File
@@ -0,0 +1,43 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import os
import codecs
from ..utils.tnt import TnT
data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
'tag.marshal')
tagger = TnT()
tagger.load(data_path)
def train(fname):
fr = codecs.open(fname, 'r', 'utf-8')
data = []
for i in fr:
line = i.strip()
if not line:
continue
tmp = map(lambda x: x.split('/'), line.split())
data.append(tmp)
fr.close()
global tagger
tagger = TnT()
tagger.train(data)
def save(fname, iszip=True):
tagger.save(fname, iszip)
def load(fname, iszip=True):
tagger.load(fname, iszip)
def tag_all(words):
return tagger.tag(words)
def tag(words):
return map(lambda x: x[1], tag_all(words))
Binary file not shown.
Binary file not shown.