Module jpfreq.word
Word
Expand source code
"""
.. include:: ../../documentation/word.md
"""
from enum import Enum
from dataclasses import dataclass
from fugashi import UnidicNode
from .util import parse_pos_node, word_rep
class WordType(Enum):
"""
The different attributes of a word.
Taken from: https://www.sketchengine.eu/tagset-jp-mecab/
"""
# NOUNS
NOUN: str = "名詞"
NOUN_LIKE: str = "名詞的"
COMMON_NOUN: str = "普通名詞"
PRONOUN: str = "代名詞"
PROPER_NOUN: str = "固有名詞"
AUXILIARY_NOUN: str = "助動詞語幹"
NAME: str = "人名"
FIRST_NAME: str = "名"
FAMILY_NAME: str = "姓"
PLACE_NAME: str = "地名"
COUNTRY: str = "国"
# VERBS
VERB: str = "動詞"
AUXILIARY_VERB: str = "助動詞"
VERBAL_SURU: str = "サ変可能"
ADVERB: str = "副詞"
ADVERBIAL: str = "副詞可能"
VERBAL: str = "動詞的"
VERBAL_ADJECTIVAL: str = "サ変形状詞可能"
# PARTICLES
PARTICLE: str = "助詞"
CASE_PARTICLE: str = "格助詞"
BINDING_PARTICLE: str = "係助詞"
ADVERBIAL_PARTICLE: str = "副助詞"
CONJUNCTIVE_PARTICLE: str = "接続助詞"
QUOTATIVE_PARTICLE: str = "引用助詞"
NOMINAL_PARTICLE: str = "準体助詞"
PHRASE_FINAL_PARTICLE: str = "終助詞"
# ADJECTIVES
NA_ADJECTIVE: str = "形状詞" # Adjectival Noun
NA_ADJECTIVE_LIKE: str = "形状詞的"
POTENTIAL_NA_ADJECTIVE: str = "形状詞可能"
I_ADJECTIVE: str = "形容詞"
I_ADJECTIVE_LIKE: str = "形容詞的"
POTENTIAL_I_ADJECTIVE: str = "形容詞可能"
# NUMBERS
NUMERAL: str = "数詞"
COUNTER: str = "助数詞"
POSSIBLE_COUNTER: str = "助数詞可能"
# SUFFIX / PREFIX
SUFFIX: str = "接尾辞"
PREFIX: str = "接頭辞"
# SYMBOLS
SYMBOL: str = "記号"
ASCII_ART: str = "AA"
EMOTICON: str = "顔文字"
PERIOD: str = "句点"
BRACKET_OPEN: str = "括弧開"
BRACKET_CLOSE: str = "括弧閉"
COMMA: str = "読点"
CHARACTER: str = "文字"
SUPPLEMENTARY_SYMBOL: str = "補助記号"
# OTHER
GENERAL: str = "一般"
FILLER: str = "フィラー"
TARI: str = "タリ"
INTERJECTION: str = "感動詞"
BLANK_SPACE: str = "空白"
UNINDEPENDENT: str = "非自立可能"
ADNOMINAL: str = "連体詞"
CONJUNCTION: str = "接続詞"
UNKNOWN: str = ""
@dataclass()
class Word:
representation: str
surface: str
types: list[WordType]
frequency: int = 1
@staticmethod
def from_node(node: UnidicNode) -> "Word":
"""
Creates a Word from a UnidicNode.
Parameters
----------
node : UnidicNode
The node to create the word from.
Returns
-------
Word
The created word.
"""
return Word(
representation=word_rep(node),
surface=node.surface,
types=[WordType(word_type) for word_type in parse_pos_node(node.pos)],
frequency=1,
)
def to_dict(self):
"""
Converts the word to a dictionary.
Returns
-------
dict
The dictionary representation of the word.
"""
return {
"representation": self.representation,
"surface": self.surface,
"types": [word_type.value for word_type in self.types],
"frequency": self.frequency,
}
Classes
class Word (representation: str, surface: str, types: list[WordType], frequency: int = 1)-
Word(representation: str, surface: str, types: list[jpfreq.word.WordType], frequency: int = 1)
Expand source code
@dataclass() class Word: representation: str surface: str types: list[WordType] frequency: int = 1 @staticmethod def from_node(node: UnidicNode) -> "Word": """ Creates a Word from a UnidicNode. Parameters ---------- node : UnidicNode The node to create the word from. Returns ------- Word The created word. """ return Word( representation=word_rep(node), surface=node.surface, types=[WordType(word_type) for word_type in parse_pos_node(node.pos)], frequency=1, ) def to_dict(self): """ Converts the word to a dictionary. Returns ------- dict The dictionary representation of the word. """ return { "representation": self.representation, "surface": self.surface, "types": [word_type.value for word_type in self.types], "frequency": self.frequency, }Class variables
var frequency : intvar representation : strvar surface : strvar types : list[WordType]
Static methods
def from_node(node: fugashi.fugashi.UnidicNode) ‑> Word-
Creates a Word from a UnidicNode. Parameters
node:UnidicNode- The node to create the word from.
Returns
Word- The created word.
Expand source code
@staticmethod def from_node(node: UnidicNode) -> "Word": """ Creates a Word from a UnidicNode. Parameters ---------- node : UnidicNode The node to create the word from. Returns ------- Word The created word. """ return Word( representation=word_rep(node), surface=node.surface, types=[WordType(word_type) for word_type in parse_pos_node(node.pos)], frequency=1, )
Methods
def to_dict(self)-
Converts the word to a dictionary. Returns
dict- The dictionary representation of the word.
Expand source code
def to_dict(self): """ Converts the word to a dictionary. Returns ------- dict The dictionary representation of the word. """ return { "representation": self.representation, "surface": self.surface, "types": [word_type.value for word_type in self.types], "frequency": self.frequency, }
class WordType (value, names=None, *, module=None, qualname=None, type=None, start=1)-
The different attributes of a word.
Taken from: https://www.sketchengine.eu/tagset-jp-mecab/
Expand source code
class WordType(Enum): """ The different attributes of a word. Taken from: https://www.sketchengine.eu/tagset-jp-mecab/ """ # NOUNS NOUN: str = "名詞" NOUN_LIKE: str = "名詞的" COMMON_NOUN: str = "普通名詞" PRONOUN: str = "代名詞" PROPER_NOUN: str = "固有名詞" AUXILIARY_NOUN: str = "助動詞語幹" NAME: str = "人名" FIRST_NAME: str = "名" FAMILY_NAME: str = "姓" PLACE_NAME: str = "地名" COUNTRY: str = "国" # VERBS VERB: str = "動詞" AUXILIARY_VERB: str = "助動詞" VERBAL_SURU: str = "サ変可能" ADVERB: str = "副詞" ADVERBIAL: str = "副詞可能" VERBAL: str = "動詞的" VERBAL_ADJECTIVAL: str = "サ変形状詞可能" # PARTICLES PARTICLE: str = "助詞" CASE_PARTICLE: str = "格助詞" BINDING_PARTICLE: str = "係助詞" ADVERBIAL_PARTICLE: str = "副助詞" CONJUNCTIVE_PARTICLE: str = "接続助詞" QUOTATIVE_PARTICLE: str = "引用助詞" NOMINAL_PARTICLE: str = "準体助詞" PHRASE_FINAL_PARTICLE: str = "終助詞" # ADJECTIVES NA_ADJECTIVE: str = "形状詞" # Adjectival Noun NA_ADJECTIVE_LIKE: str = "形状詞的" POTENTIAL_NA_ADJECTIVE: str = "形状詞可能" I_ADJECTIVE: str = "形容詞" I_ADJECTIVE_LIKE: str = "形容詞的" POTENTIAL_I_ADJECTIVE: str = "形容詞可能" # NUMBERS NUMERAL: str = "数詞" COUNTER: str = "助数詞" POSSIBLE_COUNTER: str = "助数詞可能" # SUFFIX / PREFIX SUFFIX: str = "接尾辞" PREFIX: str = "接頭辞" # SYMBOLS SYMBOL: str = "記号" ASCII_ART: str = "AA" EMOTICON: str = "顔文字" PERIOD: str = "句点" BRACKET_OPEN: str = "括弧開" BRACKET_CLOSE: str = "括弧閉" COMMA: str = "読点" CHARACTER: str = "文字" SUPPLEMENTARY_SYMBOL: str = "補助記号" # OTHER GENERAL: str = "一般" FILLER: str = "フィラー" TARI: str = "タリ" INTERJECTION: str = "感動詞" BLANK_SPACE: str = "空白" UNINDEPENDENT: str = "非自立可能" ADNOMINAL: str = "連体詞" CONJUNCTION: str = "接続詞" UNKNOWN: str = ""Ancestors
- enum.Enum
Class variables
var ADNOMINAL : strvar ADVERB : strvar ADVERBIAL : strvar ADVERBIAL_PARTICLE : strvar ASCII_ART : strvar AUXILIARY_NOUN : strvar AUXILIARY_VERB : strvar BINDING_PARTICLE : strvar BLANK_SPACE : strvar BRACKET_CLOSE : strvar BRACKET_OPEN : strvar CASE_PARTICLE : strvar CHARACTER : strvar COMMA : strvar COMMON_NOUN : strvar CONJUNCTION : strvar CONJUNCTIVE_PARTICLE : strvar COUNTER : strvar COUNTRY : strvar EMOTICON : strvar FAMILY_NAME : strvar FILLER : strvar FIRST_NAME : strvar GENERAL : strvar INTERJECTION : strvar I_ADJECTIVE : strvar I_ADJECTIVE_LIKE : strvar NAME : strvar NA_ADJECTIVE : strvar NA_ADJECTIVE_LIKE : strvar NOMINAL_PARTICLE : strvar NOUN : strvar NOUN_LIKE : strvar NUMERAL : strvar PARTICLE : strvar PERIOD : strvar PHRASE_FINAL_PARTICLE : strvar PLACE_NAME : strvar POSSIBLE_COUNTER : strvar POTENTIAL_I_ADJECTIVE : strvar POTENTIAL_NA_ADJECTIVE : strvar PREFIX : strvar PRONOUN : strvar PROPER_NOUN : strvar QUOTATIVE_PARTICLE : strvar SUFFIX : strvar SUPPLEMENTARY_SYMBOL : strvar SYMBOL : strvar TARI : strvar UNINDEPENDENT : strvar UNKNOWN : strvar VERB : strvar VERBAL : strvar VERBAL_ADJECTIVAL : strvar VERBAL_SURU : str