Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/ruby.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,7 @@ jobs:
bundler-cache: true # runs 'bundle install' and caches installed gems automatically
- name: Run tests
run: bundle exec rake test
- name: Install RBS
run: gem install rbs
- name: Validate RBS types
run: rbs -I sig validate
2 changes: 2 additions & 0 deletions sig/classifier.rbs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
module Classifier
end
26 changes: 26 additions & 0 deletions sig/classifier/bayes.rbs
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
module Classifier
class Bayes
@categories: Hash[Symbol, Hash[Symbol, Integer]]
@total_words: Integer
@category_counts: Hash[Symbol, Integer]
@category_word_count: Hash[Symbol, Integer]

def initialize: (*_ToS categories) -> void

def train: (_ToS category, String text) -> void

def untrain: (_ToS category, String text) -> void

def classifications: (String text) -> Hash[String, Float]

def classify: (String text) -> String

def categories: () -> Array[String]

def add_category: (_ToS category) -> Hash[Symbol, Integer]

alias append_category add_category

def remove_category: (_ToS category) -> void
end
end
3 changes: 3 additions & 0 deletions sig/classifier/extensions/object.rbs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
class Object
def prepare_category_name: () -> Symbol
end
25 changes: 25 additions & 0 deletions sig/classifier/extensions/string.rbs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
class String
CORPUS_SKIP_WORDS: Set[String]

def without_punctuation: () -> String

def word_hash: () -> Hash[Symbol, Integer]

def clean_word_hash: () -> Hash[Symbol, Integer]

def summary: (?Integer count, ?String separator) -> String

def paragraph_summary: (?Integer count, ?String separator) -> String

def split_sentences: () -> Array[String]

def split_paragraphs: () -> Array[String]

private

def word_hash_for_words: (Array[String] words) -> Hash[Symbol, Integer]

def word_hash_for_symbols: (Array[String] words) -> Hash[Symbol, Integer]

def perform_lsi: (Array[String] chunks, Integer count, String separator) -> String
end
23 changes: 23 additions & 0 deletions sig/classifier/extensions/vector.rbs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
class Array[unchecked out Elem]
def sum_with_identity: (?Float identity) ?{ (Elem) -> Numeric } -> Float
end

module VectorExtensions
def magnitude: () -> Float

def normalize: () -> Vector[Rational]
end

class Vector[out Elem]
include VectorExtensions
end

class Matrix[out Elem]
def self.diag: (Array[Numeric] diagonal_elements) -> Matrix[Numeric]

def trans: () -> Matrix[Elem]

def SV_decomp: (?Integer max_sweeps) -> [Matrix[Numeric], Matrix[Numeric], Array[Float]]

def []=: (Integer row_index, Integer col_index, Numeric value) -> Numeric
end
57 changes: 57 additions & 0 deletions sig/classifier/lsi.rbs
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
module Classifier
class LSI
@auto_rebuild: bool
@word_list: WordList
@items: Hash[untyped, ContentNode]
@version: Integer
@built_at_version: Integer

attr_reader word_list: WordList
attr_accessor auto_rebuild: bool

def self.gsl_available: () -> bool
def self.gsl_available=: (bool value) -> bool

def initialize: (?Hash[Symbol, untyped] options) -> void

def needs_rebuild?: () -> bool

def add_item: (untyped item, *untyped categories) ?{ (untyped) -> String } -> void

def <<: (untyped item) -> void

def categories_for: (untyped item) -> Array[untyped]

def remove_item: (untyped item) -> void

def items: () -> Array[untyped]

def build_index: (?Float cutoff) -> void

def highest_relative_content: (?Integer max_chunks) -> Array[untyped]

def proximity_array_for_content: (untyped doc) ?{ (untyped) -> String } -> Array[[untyped, Float]]

def proximity_norms_for_content: (untyped doc) ?{ (untyped) -> String } -> Array[[untyped, Float]]

def search: (String string, ?Integer max_nearest) -> Array[untyped]

def find_related: (untyped doc, ?Integer max_nearest) ?{ (untyped) -> String } -> Array[untyped]

def classify: (untyped doc, ?Float cutoff) ?{ (untyped) -> String } -> untyped

def vote: (untyped doc, ?Float cutoff) ?{ (untyped) -> String } -> Hash[untyped, Float]

def classify_with_confidence: (untyped doc, ?Float cutoff) ?{ (untyped) -> String } -> [untyped, Float?]

def highest_ranked_stems: (untyped doc, ?Integer count) -> Array[Symbol]

private

def build_reduced_matrix: (untyped matrix, ?Float cutoff) -> untyped

def node_for_content: (untyped item) ?{ (untyped) -> String } -> ContentNode

def make_word_list: () -> void
end
end
22 changes: 22 additions & 0 deletions sig/classifier/lsi/content_node.rbs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
module Classifier
class ContentNode
@word_hash: Hash[Symbol, Integer]
@categories: Array[untyped]

attr_accessor raw_vector: untyped
attr_accessor raw_norm: untyped
attr_accessor lsi_vector: untyped
attr_accessor lsi_norm: untyped
attr_accessor categories: Array[untyped]

attr_reader word_hash: Hash[Symbol, Integer]

def initialize: (Hash[Symbol, Integer] word_frequencies, *untyped categories) -> void

def search_vector: () -> untyped

def search_norm: () -> untyped

def raw_vector_with: (WordList word_list) -> untyped
end
end
15 changes: 15 additions & 0 deletions sig/classifier/lsi/word_list.rbs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
module Classifier
class WordList
@location_table: Hash[Symbol, Integer]

def initialize: () -> void

def add_word: (Symbol word) -> Integer?

def []: (Symbol lookup) -> Integer?

def word_for_index: (Integer ind) -> Symbol?

def size: () -> Integer
end
end