Skip to content

Commit 8db7d93

Browse files
committed
Reorganize.
1 parent 1608fb5 commit 8db7d93

File tree

11 files changed

+73
-28
lines changed

11 files changed

+73
-28
lines changed

examples/constrained_generation.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,8 @@
44
use candle_core::IntDType;
55
use std::fs;
66
use syncode_core::{
7-
bytes::restore_bytes,
8-
grammar::EBNFParser,
9-
mask::grammar_mask,
10-
mask_store,
11-
types::{Lexer, Parser},
7+
bytes::restore_bytes, grammar::EBNFParser, lexer::Lexer, mask::grammar_mask, mask_store,
8+
parser::Parser,
129
};
1310
use tokenizers::{Tokenizer, tokenizer};
1411

examples/construct_mask_store.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
11
use rayon::prelude::*;
2-
use serde_json::{de, ser};
3-
use std::collections::HashSet;
42
use std::fs;
53
use syncode_core::bytes::restore_bytes;
64
use syncode_core::grammar::EBNFParser;
75
use syncode_core::mask::{dfa_mask_store, grammar_mask};
8-
use syncode_core::types::{Lexer, Parser};
6+
use syncode_core::{lexer::Lexer, parser::Parser};
97
use tokenizers::Tokenizer;
108

119
fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {

src/generate.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@ use tokenizers::Tokenizer;
99

1010
use crate::{
1111
bytes::restore_bytes,
12+
grammar::Grammar,
1213
lexer::Lexer,
1314
mask::{DFAMaskStore, grammar_mask},
14-
types::{Parser},
15-
grammar::Grammar
15+
parser::Parser,
1616
};
1717

1818
/// Generate a sequence constrained by a given grammar.

src/lib.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@
99
use bytes::restore_bytes;
1010
use grammar::EBNFParser;
1111
use mask::{DFAMaskStore, dfa_mask_store};
12+
use parser::Parser;
1213
use std::fs;
1314
use tokenizers::Tokenizer;
14-
use types::Parser;
1515

1616
pub mod bytes;
1717
pub mod dfa;
@@ -24,7 +24,6 @@ pub mod production;
2424
pub mod table;
2525
pub mod terminal;
2626
pub mod token;
27-
pub mod types;
2827

2928
pub fn mask_store(model_id: &str, grammar_file: &str) -> DFAMaskStore {
3029
let tokenizer = Tokenizer::from_pretrained(model_id, None).unwrap();

src/mask.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
44
use crate::Parser;
55
use crate::dfa::all_dfa_states;
6-
use crate::token::Token;
7-
use crate::terminal::Terminal;
86
use crate::grammar::Grammar;
7+
use crate::terminal::Terminal;
8+
use crate::token::Token;
99

1010
use core::iter::Iterator;
1111
use regex_automata::dfa::dense;

src/parser.rs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@ use std::cell::LazyCell;
66
use std::collections::HashSet;
77
use std::fmt;
88

9-
use crate::table::{LRTables, ActionTable, GotoTable, Action};
9+
use crate::grammar::Grammar;
10+
use crate::table::{Action, ActionTable, GotoTable, LRTables};
1011
use crate::terminal::Terminal;
1112
use crate::token::Token;
12-
use crate::grammar::Grammar;
1313

1414
/// The Parser with its tables.
1515
///
@@ -40,7 +40,6 @@ impl Grammar {
4040
}
4141
}
4242

43-
4443
/// A special empty token for convenience.
4544
const EMPTY_TOKEN: LazyCell<Token> = LazyCell::new(|| Token {
4645
value: [].into(),
@@ -353,10 +352,9 @@ mod tests {
353352
extern crate test;
354353
use std::collections::{HashMap, HashSet};
355354

356-
use std::fs;
357-
358355
use super::*;
359-
use crate::grammar::EBNFParser;
356+
use crate::lexer::Lexer;
357+
use crate::production::Production;
360358
// Terminal definitions to be used throughout tests. Commented out ones may
361359
// come in handy in future tests but are commented to avoid dead code warnings.
362360
fn word() -> Terminal {

src/production.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
use std::sync::Arc;
2+
3+
/// A single production of the grammar.
4+
#[derive(Clone, Debug, Hash, Eq, PartialEq)]
5+
pub struct Production {
6+
/// The left hand side of the production.
7+
pub lhs: Arc<String>,
8+
/// The right hand side of the production.
9+
pub rhs: Arc<Vec<String>>,
10+
// The priority of this production. Used to resolve conflicts when constructing the table.
11+
// pub priority: i32,
12+
}
13+
14+
impl Production {
15+
/// Convenience constructor for tests.
16+
pub fn new(lhs: &str, rhs: Vec<&str>) -> Production {
17+
Production {
18+
lhs: lhs.to_string().into(),
19+
rhs: rhs
20+
.iter()
21+
.map(|s| s.to_string())
22+
.collect::<Vec<String>>()
23+
.into(),
24+
}
25+
}
26+
}

src/table.rs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,10 @@
1010
//! is work to be done here in cleaning the code and improving its efficiency;
1111
//! right now the goal is functioning code, nothing more, nothing less.
1212
13-
use std::collections::{HashMap, HashSet};
14-
use crate::production::Production;
1513
use crate::grammar::Grammar;
14+
use crate::production::Production;
15+
use std::collections::{HashMap, HashSet};
16+
1617
use rayon::prelude::*;
1718

1819
/// An item of the item set for LR parsing.
@@ -45,8 +46,6 @@ pub type ActionTable = HashMap<(usize, String), Action>;
4546
/// A goto table is a map from a (state_id, nonterminal) pair to a state_id.
4647
pub type GotoTable = HashMap<(usize, String), usize>;
4748

48-
49-
5049
pub const AUGMENTED_START_SYMBOL: &str = "supersecretnewstart";
5150

5251
/// Hold state in order to save repeat computations.
@@ -436,9 +435,8 @@ impl LRTables {
436435

437436
#[cfg(test)]
438437
mod tests {
439-
use std::fs;
440-
441438
use super::*;
439+
use crate::terminal::Terminal;
442440
use Action::*;
443441

444442
/// (4.55) from the Dragon Book 2e, section 4.7.2, p. 263.

src/token.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
use crate::terminal::Terminal;
2+
use std::sync::Arc;
3+
4+
/// A lexical token, what the lexer breaks the input into.
5+
#[derive(Clone, Debug, PartialEq)]
6+
pub struct Token {
7+
/// The content of the token.
8+
pub value: Arc<[u8]>,
9+
/// The type of terminal that this is in the grammar. None if this token
10+
/// couldn't be lexed, which can happen in the case that this is the
11+
/// unlexable remainder.
12+
pub terminal: Option<Terminal>,
13+
/// Where in the input the token begins.
14+
pub start_pos: usize,
15+
/// Where in the input the token ends.
16+
pub end_pos: usize,
17+
/// The line of the input the token begins on.
18+
pub line: usize,
19+
/// The line of the input the token ends on.
20+
pub end_line: usize,
21+
/// The column of the input the token begins on.
22+
pub column: usize,
23+
/// The column of the input the token ends on.
24+
pub end_column: usize,
25+
}

tests/generation.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,11 @@ use syncode_core::bytes::restore_bytes;
77
use syncode_core::grammar::EBNFParser;
88
use syncode_core::mask::{dfa_mask_store, grammar_mask};
99
use syncode_core::mask_store;
10-
use syncode_core::types::*;
10+
use syncode_core::parser::Parser;
11+
use syncode_core::lexer::Lexer;
12+
use syncode_core::grammar::Grammar;
13+
use syncode_core::production::Production;
14+
use syncode_core::terminal::Terminal;
1115
use tokenizers::Tokenizer;
1216

1317
#[bench]

0 commit comments

Comments
 (0)