【snownlp相关文件】上传自己的模型,调用utils/mynlp
This commit is contained in:
@@ -0,0 +1,85 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from . import normal
|
||||
from . import seg
|
||||
from . import tag
|
||||
from . import sentiment
|
||||
from .sim import bm25
|
||||
from .summary import textrank
|
||||
from .summary import words_merge
|
||||
|
||||
|
||||
class SnowNLP(object):
|
||||
|
||||
def __init__(self, doc):
|
||||
self.doc = doc
|
||||
self.bm25 = bm25.BM25(doc)
|
||||
|
||||
@property
|
||||
def words(self):
|
||||
return seg.seg(self.doc)
|
||||
|
||||
@property
|
||||
def sentences(self):
|
||||
return normal.get_sentences(self.doc)
|
||||
|
||||
@property
|
||||
def han(self):
|
||||
return normal.zh2hans(self.doc)
|
||||
|
||||
@property
|
||||
def pinyin(self):
|
||||
return normal.get_pinyin(self.doc)
|
||||
|
||||
@property
|
||||
def sentiments(self):
|
||||
return sentiment.classify(self.doc)
|
||||
|
||||
@property
|
||||
def tags(self):
|
||||
words = self.words
|
||||
tags = tag.tag(words)
|
||||
return zip(words, tags)
|
||||
|
||||
@property
|
||||
def tf(self):
|
||||
return self.bm25.f
|
||||
|
||||
@property
|
||||
def idf(self):
|
||||
return self.bm25.idf
|
||||
|
||||
def sim(self, doc):
|
||||
return self.bm25.simall(doc)
|
||||
|
||||
def summary(self, limit=5):
|
||||
doc = []
|
||||
sents = self.sentences
|
||||
for sent in sents:
|
||||
words = seg.seg(sent)
|
||||
words = normal.filter_stop(words)
|
||||
doc.append(words)
|
||||
rank = textrank.TextRank(doc)
|
||||
rank.solve()
|
||||
ret = []
|
||||
for index in rank.top_index(limit):
|
||||
ret.append(sents[index])
|
||||
return ret
|
||||
|
||||
def keywords(self, limit=5, merge=False):
|
||||
doc = []
|
||||
sents = self.sentences
|
||||
for sent in sents:
|
||||
words = seg.seg(sent)
|
||||
words = normal.filter_stop(words)
|
||||
doc.append(words)
|
||||
rank = textrank.KeywordTextRank(doc)
|
||||
rank.solve()
|
||||
ret = []
|
||||
for w in rank.top_index(limit):
|
||||
ret.append(w)
|
||||
if merge:
|
||||
wm = words_merge.SimpleMerge(self.doc, ret)
|
||||
return wm.merge()
|
||||
return ret
|
||||
Reference in New Issue
Block a user