1+ //! Dictionary management for morphological analysis.
2+ //!
3+ //! This module provides functionality for building, loading, and managing dictionaries
4+ //! used in morphological analysis.
5+ //!
6+ //! # Dictionary Types
7+ //!
8+ //! - **Dictionary**: Main dictionary for morphological analysis
9+ //! - **UserDictionary**: Custom user-defined dictionary for additional words
10+ //!
11+ //! # Examples
12+ //!
13+ //! ```python
14+ //! import lindera
15+ //!
16+ //! # Load a pre-built dictionary
17+ //! dictionary = lindera.load_dictionary("ipadic")
18+ //!
19+ //! # Build a custom dictionary
20+ //! metadata = lindera.Metadata()
21+ //! lindera.build_dictionary("/path/to/input", "/path/to/output", metadata)
22+ //!
23+ //! # Build a user dictionary
24+ //! lindera.build_user_dictionary("ipadic", "user.csv", "/path/to/output")
25+ //! ```
26+
127use std:: path:: Path ;
228
329use pyo3:: { exceptions:: PyValueError , prelude:: * } ;
@@ -10,6 +36,20 @@ use lindera::dictionary::{
1036
1137use crate :: metadata:: PyMetadata ;
1238
39+ /// A morphological analysis dictionary.
40+ ///
41+ /// Contains the data structures needed for tokenization and morphological analysis.
42+ ///
43+ /// # Examples
44+ ///
45+ /// ```python
46+ /// # Load a dictionary
47+ /// dictionary = lindera.load_dictionary("ipadic")
48+ ///
49+ /// # Access metadata
50+ /// print(dictionary.metadata_name())
51+ /// print(dictionary.metadata_encoding())
52+ /// ```
1353#[ pyclass( name = "Dictionary" ) ]
1454#[ derive( Clone ) ]
1555pub struct PyDictionary {
@@ -18,14 +58,17 @@ pub struct PyDictionary {
1858
1959#[ pymethods]
2060impl PyDictionary {
61+ /// Returns the name of the dictionary metadata.
2162 pub fn metadata_name ( & self ) -> String {
2263 self . inner . metadata . name . clone ( )
2364 }
2465
66+ /// Returns the character encoding of the dictionary.
2567 pub fn metadata_encoding ( & self ) -> String {
2668 self . inner . metadata . encoding . clone ( )
2769 }
2870
71+ /// Returns the full metadata object of the dictionary.
2972 pub fn metadata ( & self ) -> PyMetadata {
3073 PyMetadata :: from ( self . inner . metadata . clone ( ) )
3174 }
@@ -46,6 +89,21 @@ impl PyDictionary {
4689 }
4790}
4891
92+ /// A user-defined dictionary for custom words.
93+ ///
94+ /// User dictionaries allow you to add custom words and their morphological features
95+ /// that are not present in the main dictionary.
96+ ///
97+ /// # Examples
98+ ///
99+ /// ```python
100+ /// # Build a user dictionary
101+ /// lindera.build_user_dictionary("ipadic", "user.csv", "/path/to/output")
102+ ///
103+ /// # Load it
104+ /// metadata = lindera.Metadata()
105+ /// user_dict = lindera.load_user_dictionary("/path/to/output", metadata)
106+ /// ```
49107#[ pyclass( name = "UserDictionary" ) ]
50108#[ derive( Clone ) ]
51109pub struct PyUserDictionary {
@@ -72,6 +130,24 @@ impl PyUserDictionary {
72130 }
73131}
74132
133+ /// Builds a dictionary from source files.
134+ ///
135+ /// # Arguments
136+ ///
137+ /// * `input_dir` - Directory containing dictionary source files.
138+ /// * `output_dir` - Directory where the built dictionary will be saved.
139+ /// * `metadata` - Metadata configuration for the dictionary.
140+ ///
141+ /// # Errors
142+ ///
143+ /// Returns an error if the input directory doesn't exist or if the build fails.
144+ ///
145+ /// # Examples
146+ ///
147+ /// ```python
148+ /// metadata = lindera.Metadata(name="custom", encoding="UTF-8")
149+ /// lindera.build_dictionary("/path/to/input", "/path/to/output", metadata)
150+ /// ```
75151#[ pyfunction]
76152#[ pyo3( signature = ( input_dir, output_dir, metadata) ) ]
77153pub fn build_dictionary ( input_dir : & str , output_dir : & str , metadata : PyMetadata ) -> PyResult < ( ) > {
@@ -93,6 +169,34 @@ pub fn build_dictionary(input_dir: &str, output_dir: &str, metadata: PyMetadata)
93169 Ok ( ( ) )
94170}
95171
172+ /// Builds a user dictionary from a CSV file.
173+ ///
174+ /// # Arguments
175+ ///
176+ /// * `_kind` - Dictionary kind (currently unused, reserved for future use).
177+ /// * `input_file` - Path to the CSV file containing user dictionary entries.
178+ /// * `output_dir` - Directory where the built user dictionary will be saved.
179+ /// * `metadata` - Optional metadata configuration. If None, default values are used.
180+ ///
181+ /// # CSV Format
182+ ///
183+ /// The CSV file should contain entries in the format specified by the dictionary schema.
184+ /// Typically: surface,reading,pronunciation
185+ ///
186+ /// # Errors
187+ ///
188+ /// Returns an error if the input file doesn't exist or if the build fails.
189+ ///
190+ /// # Examples
191+ ///
192+ /// ```python
193+ /// # Build with default metadata
194+ /// lindera.build_user_dictionary("ipadic", "user.csv", "/path/to/output")
195+ ///
196+ /// # Build with custom metadata
197+ /// metadata = lindera.Metadata()
198+ /// lindera.build_user_dictionary("ipadic", "user.csv", "/path/to/output", metadata)
199+ /// ```
96200#[ pyfunction]
97201#[ pyo3( signature = ( _kind, input_file, output_dir, metadata=None ) ) ]
98202pub fn build_user_dictionary (
@@ -129,6 +233,34 @@ pub fn build_user_dictionary(
129233 Ok ( ( ) )
130234}
131235
236+ /// Loads a dictionary from the specified URI.
237+ ///
238+ /// # Arguments
239+ ///
240+ /// * `uri` - URI to the dictionary. Can be a file path or embedded dictionary name.
241+ ///
242+ /// # Supported URIs
243+ ///
244+ /// - File paths: `/path/to/dictionary`
245+ /// - Embedded dictionaries: `ipadic`, `unidic`, `ko-dic`, `cc-cedict`
246+ ///
247+ /// # Returns
248+ ///
249+ /// A loaded `Dictionary` object.
250+ ///
251+ /// # Errors
252+ ///
253+ /// Returns an error if the dictionary cannot be loaded from the specified URI.
254+ ///
255+ /// # Examples
256+ ///
257+ /// ```python
258+ /// # Load an embedded dictionary
259+ /// dict = lindera.load_dictionary("ipadic")
260+ ///
261+ /// # Load from file path
262+ /// dict = lindera.load_dictionary("/path/to/dictionary")
263+ /// ```
132264#[ pyfunction]
133265#[ pyo3( signature = ( uri) ) ]
134266pub fn load_dictionary ( uri : & str ) -> PyResult < PyDictionary > {
@@ -137,6 +269,27 @@ pub fn load_dictionary(uri: &str) -> PyResult<PyDictionary> {
137269 . map ( PyDictionary :: new)
138270}
139271
272+ /// Loads a user dictionary from the specified URI.
273+ ///
274+ /// # Arguments
275+ ///
276+ /// * `uri` - URI to the user dictionary directory.
277+ /// * `metadata` - Metadata configuration for the user dictionary.
278+ ///
279+ /// # Returns
280+ ///
281+ /// A loaded `UserDictionary` object.
282+ ///
283+ /// # Errors
284+ ///
285+ /// Returns an error if the user dictionary cannot be loaded.
286+ ///
287+ /// # Examples
288+ ///
289+ /// ```python
290+ /// metadata = lindera.Metadata()
291+ /// user_dict = lindera.load_user_dictionary("/path/to/user_dict", metadata)
292+ /// ```
140293#[ pyfunction]
141294#[ pyo3( signature = ( uri, metadata) ) ]
142295pub fn load_user_dictionary ( uri : & str , metadata : PyMetadata ) -> PyResult < PyUserDictionary > {
0 commit comments