【snownlp相关文件】上传自己的模型,调用utils/mynlp

This commit is contained in:
redhongx
2024-07-04 11:54:06 +08:00
parent f09fcb3000
commit 93b72ea2e0
33 changed files with 135956 additions and 0 deletions
+73
View File
@@ -0,0 +1,73 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import os
import codecs
from .. import normal
from .. import seg
from ..classification.bayes import Bayes
data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
'sentiment.marshal')
class Sentiment(object):
def __init__(self):
self.classifier = Bayes()
def save(self, fname, iszip=True):
self.classifier.save(fname, iszip)
def load(self, fname=data_path, iszip=True):
self.classifier.load(fname, iszip)
def handle(self, doc):
words = seg.seg(doc)
words = normal.filter_stop(words)
return words
def train(self, neg_docs, pos_docs):
data = []
for sent in neg_docs:
data.append([self.handle(sent), 'neg'])
for sent in pos_docs:
data.append([self.handle(sent), 'pos'])
self.classifier.train(data)
def classify(self, sent):
ret, prob = self.classifier.classify(self.handle(sent))
if ret == 'pos':
return prob
return 1-prob
classifier = Sentiment()
classifier.load()
def train(neg_file, pos_file):
neg = codecs.open(neg_file, 'r', 'utf-8').readlines()
pos = codecs.open(pos_file, 'r', 'utf-8').readlines()
neg_docs = []
pos_docs = []
for line in neg:
neg_docs.append(line.rstrip("\r\n"))
for line in pos:
pos_docs.append(line.rstrip("\r\n"))
global classifier
classifier = Sentiment()
classifier.train(neg_docs, pos_docs)
def save(fname, iszip=True):
classifier.save(fname, iszip)
def load(fname, iszip=True):
classifier.load(fname, iszip)
def classify(sent):
return classifier.classify(sent)
File diff suppressed because it is too large Load Diff
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.