Module jpfreq.util
Utility
Expand source code
"""
.. include:: ../../documentation/util.md
"""
from fugashi import UnidicNode
def percent_of(part: [int | float], total: [int | float]) -> float:
"""
Gets the part of the total as a percentage.
Parameters
----------
part : [int | float]
Fraction of the total.
total : [int | float]
Total value.
Returns
-------
float
Part percent of total.
"""
if total == 0:
return 0
return (part / total) * 100
def parse_pos_node(pos: str) -> list[str]:
"""
Parses a POS node from fugashi into a list of POS values.
This is essentially a wrapper around str.split(",") that gets the `WordType`(s) from the POS value.
Parameters
----------
pos : str
The POS node to parse. This is a comma separated string of POS values. e.g. "名詞,一般,*,*"
Returns
-------
list[str]
A list of POS values.
"""
split_pos: list[str] = pos.split(",")
if len(split_pos) == 1 and split_pos[0] == "":
return []
return [pos_value for pos_value in split_pos if pos_value != "*"]
def word_rep(word: UnidicNode):
"""
Gets the string representation of a UnidicNode.
This is the lemma of the word.
Parameters
----------
word : UnidicNode
The word to get the representation of.
Returns
-------
str
The string representation of the word.
"""
if word.feature.lemma is None:
return word.surface
return f"{word.feature.lemma.split('-')[0]}"
def in_range(value: int, minimum: int, maximum: int):
"""
Checks if a value is in a range.
Parameters
----------
value : int
The value to check.
minimum : int
The minimum value (inclusive). -1 for no minimum.
maximum : int
The maximum value (inclusive). -1 for no maximum.
Returns
-------
bool
Whether the value is in the range or not.
"""
if minimum != -1 and value < minimum:
return False
if maximum != -1 and value > maximum:
return False
return True
Functions
def in_range(value: int, minimum: int, maximum: int)
-
Checks if a value is in a range. Parameters
value
:int
- The value to check.
minimum
:int
- The minimum value (inclusive). -1 for no minimum.
maximum
:int
- The maximum value (inclusive). -1 for no maximum.
Returns
bool
- Whether the value is in the range or not.
Expand source code
def in_range(value: int, minimum: int, maximum: int): """ Checks if a value is in a range. Parameters ---------- value : int The value to check. minimum : int The minimum value (inclusive). -1 for no minimum. maximum : int The maximum value (inclusive). -1 for no maximum. Returns ------- bool Whether the value is in the range or not. """ if minimum != -1 and value < minimum: return False if maximum != -1 and value > maximum: return False return True
def parse_pos_node(pos: str) ‑> list[str]
-
Parses a POS node from fugashi into a list of POS values. This is essentially a wrapper around str.split(",") that gets the
WordType
(s) from the POS value. Parameters
pos
:str
- The POS node to parse. This is a comma separated string of POS values. e.g. "名詞,一般,,"
Returns
list[str]
- A list of POS values.
Expand source code
def parse_pos_node(pos: str) -> list[str]: """ Parses a POS node from fugashi into a list of POS values. This is essentially a wrapper around str.split(",") that gets the `WordType`(s) from the POS value. Parameters ---------- pos : str The POS node to parse. This is a comma separated string of POS values. e.g. "名詞,一般,*,*" Returns ------- list[str] A list of POS values. """ split_pos: list[str] = pos.split(",") if len(split_pos) == 1 and split_pos[0] == "": return [] return [pos_value for pos_value in split_pos if pos_value != "*"]
def percent_of(part: [int | float], total: [int | float]) ‑> float
-
Gets the part of the total as a percentage. Parameters
part
:[int | float]
- Fraction of the total.
total
:[int | float]
- Total value.
Returns
float
- Part percent of total.
Expand source code
def percent_of(part: [int | float], total: [int | float]) -> float: """ Gets the part of the total as a percentage. Parameters ---------- part : [int | float] Fraction of the total. total : [int | float] Total value. Returns ------- float Part percent of total. """ if total == 0: return 0 return (part / total) * 100
def word_rep(word: fugashi.fugashi.UnidicNode)
-
Gets the string representation of a UnidicNode. This is the lemma of the word.
Parameters
word
:UnidicNode
- The word to get the representation of.
Returns
str
- The string representation of the word.
Expand source code
def word_rep(word: UnidicNode): """ Gets the string representation of a UnidicNode. This is the lemma of the word. Parameters ---------- word : UnidicNode The word to get the representation of. Returns ------- str The string representation of the word. """ if word.feature.lemma is None: return word.surface return f"{word.feature.lemma.split('-')[0]}"