lindera
diff --git a/‎Cargo.toml‎
Lines changed: 1 addition & 0 deletions b/‎Cargo.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/dictionary.rs‎
Lines changed: 153 additions & 0 deletions b/‎src/dictionary.rs‎
Lines changed: 153 additions & 0 deletions
diff --git a/‎src/error.rs‎
Lines changed: 8 additions & 0 deletions b/‎src/error.rs‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎src/lib.rs‎
Lines changed: 39 additions & 0 deletions b/‎src/lib.rs‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎src/metadata.rs‎
Lines changed: 47 additions & 0 deletions b/‎src/metadata.rs‎
Lines changed: 47 additions & 0 deletions
@@ -45,3 +45,4 @@ serde_json = "1.0.145"
 num_cpus = "1.17.0"
 
 lindera = "1.4.2"
+lindera = "1.4.2"
@@ -1,3 +1,29 @@
+//! Dictionary management for morphological analysis.
+//!
+//! This module provides functionality for building, loading, and managing dictionaries
+//! used in morphological analysis.
+//!
+//! # Dictionary Types
+//!
+//! - **Dictionary**: Main dictionary for morphological analysis
+//! - **UserDictionary**: Custom user-defined dictionary for additional words
+//!
+//! # Examples
+//!
+//! ```python
+//! import lindera
+//!
+//! # Load a pre-built dictionary
+//! dictionary = lindera.load_dictionary("ipadic")
+//!
+//! # Build a custom dictionary
+//! metadata = lindera.Metadata()
+//! lindera.build_dictionary("/path/to/input", "/path/to/output", metadata)
+//!
+//! # Build a user dictionary
+//! lindera.build_user_dictionary("ipadic", "user.csv", "/path/to/output")
+//! ```
+
 use std::path::Path;
 
 use pyo3::{exceptions::PyValueError, prelude::*};
@@ -10,6 +36,20 @@ use lindera::dictionary::{
 
 use crate::metadata::PyMetadata;
 
+/// A morphological analysis dictionary.
+///
+/// Contains the data structures needed for tokenization and morphological analysis.
+///
+/// # Examples
+///
+/// ```python
+/// # Load a dictionary
+/// dictionary = lindera.load_dictionary("ipadic")
+///
+/// # Access metadata
+/// print(dictionary.metadata_name())
+/// print(dictionary.metadata_encoding())
+/// ```
 #[pyclass(name = "Dictionary")]
 #[derive(Clone)]
 pub struct PyDictionary {
@@ -18,14 +58,17 @@ pub struct PyDictionary {
 
 #[pymethods]
 impl PyDictionary {
+    /// Returns the name of the dictionary metadata.
     pub fn metadata_name(&self) -> String {
         self.inner.metadata.name.clone()
     }
 
+    /// Returns the character encoding of the dictionary.
     pub fn metadata_encoding(&self) -> String {
         self.inner.metadata.encoding.clone()
     }
 
+    /// Returns the full metadata object of the dictionary.
     pub fn metadata(&self) -> PyMetadata {
         PyMetadata::from(self.inner.metadata.clone())
     }
@@ -46,6 +89,21 @@ impl PyDictionary {
     }
 }
 
+/// A user-defined dictionary for custom words.
+///
+/// User dictionaries allow you to add custom words and their morphological features
+/// that are not present in the main dictionary.
+///
+/// # Examples
+///
+/// ```python
+/// # Build a user dictionary
+/// lindera.build_user_dictionary("ipadic", "user.csv", "/path/to/output")
+///
+/// # Load it
+/// metadata = lindera.Metadata()
+/// user_dict = lindera.load_user_dictionary("/path/to/output", metadata)
+/// ```
 #[pyclass(name = "UserDictionary")]
 #[derive(Clone)]
 pub struct PyUserDictionary {
@@ -72,6 +130,24 @@ impl PyUserDictionary {
     }
 }
 
+/// Builds a dictionary from source files.
+///
+/// # Arguments
+///
+/// * `input_dir` - Directory containing dictionary source files.
+/// * `output_dir` - Directory where the built dictionary will be saved.
+/// * `metadata` - Metadata configuration for the dictionary.
+///
+/// # Errors
+///
+/// Returns an error if the input directory doesn't exist or if the build fails.
+///
+/// # Examples
+///
+/// ```python
+/// metadata = lindera.Metadata(name="custom", encoding="UTF-8")
+/// lindera.build_dictionary("/path/to/input", "/path/to/output", metadata)
+/// ```
 #[pyfunction]
 #[pyo3(signature = (input_dir, output_dir, metadata))]
 pub fn build_dictionary(input_dir: &str, output_dir: &str, metadata: PyMetadata) -> PyResult<()> {
@@ -93,6 +169,34 @@ pub fn build_dictionary(input_dir: &str, output_dir: &str, metadata: PyMetadata)
     Ok(())
 }
 
+/// Builds a user dictionary from a CSV file.
+///
+/// # Arguments
+///
+/// * `_kind` - Dictionary kind (currently unused, reserved for future use).
+/// * `input_file` - Path to the CSV file containing user dictionary entries.
+/// * `output_dir` - Directory where the built user dictionary will be saved.
+/// * `metadata` - Optional metadata configuration. If None, default values are used.
+///
+/// # CSV Format
+///
+/// The CSV file should contain entries in the format specified by the dictionary schema.
+/// Typically: surface,reading,pronunciation
+///
+/// # Errors
+///
+/// Returns an error if the input file doesn't exist or if the build fails.
+///
+/// # Examples
+///
+/// ```python
+/// # Build with default metadata
+/// lindera.build_user_dictionary("ipadic", "user.csv", "/path/to/output")
+///
+/// # Build with custom metadata
+/// metadata = lindera.Metadata()
+/// lindera.build_user_dictionary("ipadic", "user.csv", "/path/to/output", metadata)
+/// ```
 #[pyfunction]
 #[pyo3(signature = (_kind, input_file, output_dir, metadata=None))]
 pub fn build_user_dictionary(
@@ -129,6 +233,34 @@ pub fn build_user_dictionary(
     Ok(())
 }
 
+/// Loads a dictionary from the specified URI.
+///
+/// # Arguments
+///
+/// * `uri` - URI to the dictionary. Can be a file path or embedded dictionary name.
+///
+/// # Supported URIs
+///
+/// - File paths: `/path/to/dictionary`
+/// - Embedded dictionaries: `ipadic`, `unidic`, `ko-dic`, `cc-cedict`
+///
+/// # Returns
+///
+/// A loaded `Dictionary` object.
+///
+/// # Errors
+///
+/// Returns an error if the dictionary cannot be loaded from the specified URI.
+///
+/// # Examples
+///
+/// ```python
+/// # Load an embedded dictionary
+/// dict = lindera.load_dictionary("ipadic")
+///
+/// # Load from file path
+/// dict = lindera.load_dictionary("/path/to/dictionary")
+/// ```
 #[pyfunction]
 #[pyo3(signature = (uri))]
 pub fn load_dictionary(uri: &str) -> PyResult<PyDictionary> {
@@ -137,6 +269,27 @@ pub fn load_dictionary(uri: &str) -> PyResult<PyDictionary> {
         .map(PyDictionary::new)
 }
 
+/// Loads a user dictionary from the specified URI.
+///
+/// # Arguments
+///
+/// * `uri` - URI to the user dictionary directory.
+/// * `metadata` - Metadata configuration for the user dictionary.
+///
+/// # Returns
+///
+/// A loaded `UserDictionary` object.
+///
+/// # Errors
+///
+/// Returns an error if the user dictionary cannot be loaded.
+///
+/// # Examples
+///
+/// ```python
+/// metadata = lindera.Metadata()
+/// user_dict = lindera.load_user_dictionary("/path/to/user_dict", metadata)
+/// ```
 #[pyfunction]
 #[pyo3(signature = (uri, metadata))]
 pub fn load_user_dictionary(uri: &str, metadata: PyMetadata) -> PyResult<PyUserDictionary> {
 
@@ -1,8 +1,16 @@
+//! Error types for Lindera operations.
+//!
+//! This module provides error types used throughout the Lindera Python bindings.
+
 use std::fmt;
 
 use pyo3::exceptions::PyException;
 use pyo3::prelude::*;
 
+/// Error type for Lindera operations.
+///
+/// Represents errors that can occur during tokenization, dictionary operations,
+/// or other Lindera functionality.
 #[pyclass(name = "LinderaError")]
 #[derive(Debug, Clone)]
 pub struct PyLinderaError {
 
@@ -1,3 +1,34 @@
+//! # Lindera Python Bindings
+//!
+//! Python bindings for [Lindera](https://github.com/lindera/lindera), a morphological analysis library for CJK text.
+//!
+//! Lindera provides high-performance tokenization and morphological analysis for:
+//! - Japanese (IPADIC, IPADIC NEologd, UniDic)
+//! - Korean (ko-dic)
+//! - Chinese (CC-CEDICT)
+//!
+//! ## Features
+//!
+//! - **Dictionary management**: Build, load, and use custom dictionaries
+//! - **Tokenization**: Multiple tokenization modes (normal, decompose)
+//! - **Filters**: Character and token filtering pipeline
+//! - **Training**: Train custom morphological models (with `train` feature)
+//! - **User dictionaries**: Support for custom user dictionaries
+//!
+//! ## Examples
+//!
+//! ```python
+//! import lindera
+//!
+//! # Create a tokenizer
+//! tokenizer = lindera.TokenizerBuilder().build()
+//!
+//! # Tokenize text
+//! tokens = tokenizer.tokenize("関西国際空港")
+//! for token in tokens:
+//!     print(token["text"], token["detail"])
+//! ```
+
 pub mod dictionary;
 pub mod error;
 pub mod metadata;
@@ -17,11 +48,19 @@ use crate::mode::{PyMode, PyPenalty};
 use crate::schema::{PyFieldDefinition, PyFieldType, PySchema};
 use crate::tokenizer::{PyTokenizer, PyTokenizerBuilder};
 
+/// Returns the version of the lindera-python package.
+///
+/// # Returns
+///
+/// Version string in the format "major.minor.patch"
 #[pyfunction]
 pub fn version() -> String {
     env!("CARGO_PKG_VERSION").to_string()
 }
 
+/// Python module definition for lindera.
+///
+/// This module exports all classes and functions available to Python code.
 #[pymodule]
 fn lindera(module: &Bound<'_, PyModule>) -> PyResult<()> {
     module.add_class::<PyDictionary>()?;
 
@@ -1,3 +1,25 @@
+//! Dictionary metadata configuration.
+//!
+//! This module provides structures for configuring dictionary metadata, including
+//! compression algorithms, character encodings, and schema definitions.
+//!
+//! # Examples
+//!
+//! ```python
+//! # Create metadata with default values
+//! metadata = lindera.Metadata()
+//!
+//! # Create metadata with custom values
+//! metadata = lindera.Metadata(
+//!     name="custom_dict",
+//!     encoding="UTF-8",
+//!     compress_algorithm=lindera.CompressionAlgorithm.Deflate
+//! )
+//!
+//! # Load metadata from JSON
+//! metadata = lindera.Metadata.from_json_file("metadata.json")
+//! ```
+
 use std::collections::HashMap;
 
 use pyo3::prelude::*;
@@ -6,12 +28,19 @@ use lindera::dictionary::{CompressionAlgorithm, Metadata};
 
 use crate::schema::PySchema;
 
+/// Compression algorithm for dictionary data.
+///
+/// Determines how dictionary data is compressed when saved to disk.
 #[pyclass(name = "CompressionAlgorithm")]
 #[derive(Debug, Clone)]
 pub enum PyCompressionAlgorithm {
+    /// DEFLATE compression algorithm
     Deflate,
+    /// Zlib compression algorithm
     Zlib,
+    /// Gzip compression algorithm
     Gzip,
+    /// No compression (raw data)
     Raw,
 }
 
@@ -53,6 +82,24 @@ impl From<CompressionAlgorithm> for PyCompressionAlgorithm {
     }
 }
 
+/// Dictionary metadata configuration.
+///
+/// Contains all configuration parameters for building and using dictionaries.
+///
+/// # Fields
+///
+/// * `name` - Dictionary name
+/// * `encoding` - Character encoding (default: "UTF-8")
+/// * `compress_algorithm` - Compression algorithm (default: Deflate)
+/// * `default_word_cost` - Default cost for unknown words (default: -10000)
+/// * `default_left_context_id` - Default left context ID (default: 1288)
+/// * `default_right_context_id` - Default right context ID (default: 1288)
+/// * `default_field_value` - Default value for missing fields (default: "*")
+/// * `flexible_csv` - Allow flexible CSV parsing (default: false)
+/// * `skip_invalid_cost_or_id` - Skip entries with invalid cost/ID (default: false)
+/// * `normalize_details` - Normalize morphological details (default: false)
+/// * `dictionary_schema` - Schema for main dictionary
+/// * `user_dictionary_schema` - Schema for user dictionary
 #[pyclass(name = "Metadata")]
 #[derive(Debug, Clone)]
 pub struct PyMetadata {
Original file line number	Diff line number	Diff line change
`@@ -45,3 +45,4 @@ serde_json = "1.0.145"`
`45`	`45`	`num_cpus = "1.17.0"`
`46`	`46`
`47`	`47`	`lindera = "1.4.2"`
	`48`	`+lindera = "1.4.2"`