Module jpfreq.kanji

Kanji

Expand source code
"""
.. include:: ../../documentation/kanji.md
"""

from unicodedata import name as u_name
from dataclasses import dataclass

KANJI_UNICODE_NAME = "CJK UNIFIED IDEOGRAPH"
"""The unicode name of a kanji character. 
Used with the `u_name` function to determine if a code point is a kanji character."""


@dataclass
class Kanji:
    """
    A dataclass representing a kanji character.
    """

    representation: str
    """The text representation of the kanji character."""
    frequency: int
    """The frequency of the kanji character."""


def is_character_kanji(input_character: str) -> bool:
    """
    Checks if the input character is a kanji character.
    Parameters
    ----------
    input_character : str
        The character to check.

    Returns
    -------
    bool
        Whether the character is a kanji character or not.
    """
    if len(input_character) != 1:
        raise TypeError(
            f"is_character_kanji: Expected str of length 1, got length of {len(input_character)} instead"
        )

    return KANJI_UNICODE_NAME in u_name(input_character)


def all_kanji_in_string(input_string: str) -> list[Kanji]:
    """
    Returns all kanji characters in the input string.
    Parameters
    ----------
    input_string : str
        The string to check.

    Returns
    -------
    list[Kanji]
        A list of all kanji characters in the input string.
    """
    kanji: list[Kanji] = []

    for character in input_string:
        if is_character_kanji(character):
            kanji.append(Kanji(character, 1))

    return kanji

Global variables

var KANJI_UNICODE_NAME

The unicode name of a kanji character. Used with the u_name function to determine if a code point is a kanji character.

Functions

def all_kanji_in_string(input_string: str) ‑> list[Kanji]

Returns all kanji characters in the input string. Parameters


input_string : str
The string to check.

Returns

list[Kanji]
A list of all kanji characters in the input string.
Expand source code
def all_kanji_in_string(input_string: str) -> list[Kanji]:
    """
    Returns all kanji characters in the input string.
    Parameters
    ----------
    input_string : str
        The string to check.

    Returns
    -------
    list[Kanji]
        A list of all kanji characters in the input string.
    """
    kanji: list[Kanji] = []

    for character in input_string:
        if is_character_kanji(character):
            kanji.append(Kanji(character, 1))

    return kanji
def is_character_kanji(input_character: str) ‑> bool

Checks if the input character is a kanji character. Parameters


input_character : str
The character to check.

Returns

bool
Whether the character is a kanji character or not.
Expand source code
def is_character_kanji(input_character: str) -> bool:
    """
    Checks if the input character is a kanji character.
    Parameters
    ----------
    input_character : str
        The character to check.

    Returns
    -------
    bool
        Whether the character is a kanji character or not.
    """
    if len(input_character) != 1:
        raise TypeError(
            f"is_character_kanji: Expected str of length 1, got length of {len(input_character)} instead"
        )

    return KANJI_UNICODE_NAME in u_name(input_character)

Classes

class Kanji (representation: str, frequency: int)

A dataclass representing a kanji character.

Expand source code
@dataclass
class Kanji:
    """
    A dataclass representing a kanji character.
    """

    representation: str
    """The text representation of the kanji character."""
    frequency: int
    """The frequency of the kanji character."""

Class variables

var frequency : int

The frequency of the kanji character.

var representation : str

The text representation of the kanji character.