Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 83 additions & 0 deletions test/bayes/bayesian_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -230,4 +230,87 @@ def test_untrain_decrements_category_word_count
new_word_count = @classifier.instance_variable_get(:@category_word_count)[:Interesting]
assert new_word_count < initial_word_count, 'Category word count should decrease'
end

# Edge case tests

def test_empty_string_training
@classifier.train_interesting ''
category_words = @classifier.instance_variable_get(:@categories)[:Interesting]
assert_empty category_words, 'Empty string should not add any words'
end

def test_empty_string_classification
@classifier.train_interesting 'good words here'
@classifier.train_uninteresting 'bad words here'

result = @classifier.classify('')
assert_includes %w[Interesting Uninteresting], result, 'Should return a category even for empty string'
end

def test_unicode_text_training
@classifier.train_interesting '日本語 chinese 中文 korean 한국어'
@classifier.train_uninteresting 'plain english text only'

# Unicode characters are treated as words if long enough
category_words = @classifier.instance_variable_get(:@categories)[:Interesting]
assert category_words.size > 0, 'Should store unicode words'
end

def test_emoji_training
@classifier.train_interesting '😀 happy 🎉 celebration 🚀 rocket'
@classifier.train_uninteresting 'sad 😢 crying 💔 heartbreak'

result = @classifier.classify('happy celebration')
assert_equal 'Interesting', result, 'Should handle emoji in text'
end

def test_special_characters_only
@classifier.train_interesting '!@#$%^&*()'
category_words = @classifier.instance_variable_get(:@categories)[:Interesting]
# Special chars become symbols in word_hash, but clean_word_hash filters them
assert_kind_of Hash, category_words
end

def test_very_long_text
long_text = 'interesting ' * 10_000
@classifier.train_interesting long_text
@classifier.train_uninteresting 'boring text'

total_words = @classifier.instance_variable_get(:@total_words)
assert total_words > 0, 'Should handle very long text'

result = @classifier.classify('interesting')
assert_equal 'Interesting', result
end

def test_single_word_classification
@classifier.train_interesting 'apple'
@classifier.train_uninteresting 'banana'

assert_equal 'Interesting', @classifier.classify('apple')
assert_equal 'Uninteresting', @classifier.classify('banana')
end

def test_whitespace_only
@classifier.train_interesting " \t\n "
category_words = @classifier.instance_variable_get(:@categories)[:Interesting]
assert_empty category_words, 'Whitespace-only should not add words'
end

def test_mixed_case_classification
@classifier.train_interesting 'UPPERCASE lowercase MiXeD'
@classifier.train_uninteresting 'different words here'

# Words are downcased during training, so uppercase query should match
result = @classifier.classify('uppercase lowercase')
assert_equal 'Interesting', result, 'Should handle mixed case'
end

def test_numbers_in_text
@classifier.train_interesting 'test123 456test 789'
@classifier.train_uninteresting 'abc def ghi'

result = @classifier.classify('test123')
assert_equal 'Interesting', result, 'Should handle numbers in text'
end
end
96 changes: 96 additions & 0 deletions test/extensions/word_hash_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,100 @@ def test_summing_a_nil_array
def test_summing_an_empty_array
assert_equal Array[].sum_with_identity, 0
end

def test_sum_with_block
assert_equal [1, 2, 3].sum_with_identity { |x| x * 2 }, 12.0
end

def test_sum_with_custom_identity
assert_equal [].sum_with_identity(100), 100.0
end
end

class StringPunctuationTest < Minitest::Test
def test_without_punctuation_basic
result = "Hello, world!".without_punctuation
assert_equal "Hello world ", result
end

def test_without_punctuation_many_symbols
result = "Hello (greeting's), with {braces} < >...?".without_punctuation
assert_equal "Hello greetings with braces ", result
end

def test_without_punctuation_empty_string
result = "".without_punctuation
assert_equal "", result
end

def test_without_punctuation_no_punctuation
result = "plain text here".without_punctuation
assert_equal "plain text here", result
end

def test_without_punctuation_only_punctuation
result = "!@#$%^&*()".without_punctuation
assert_equal " ", result
end
end

class VectorExtensionsTest < Minitest::Test
def test_magnitude_basic
vec = Vector[3, 4]
assert_in_delta 5.0, vec.magnitude, 0.001
end

def test_magnitude_zero_vector
vec = Vector[0, 0, 0]
assert_equal 0.0, vec.magnitude
end

def test_magnitude_single_element
vec = Vector[5]
assert_equal 5.0, vec.magnitude
end

def test_magnitude_negative_values
vec = Vector[-3, -4]
assert_in_delta 5.0, vec.magnitude, 0.001
end

def test_normalize_basic
vec = Vector[3, 4]
normalized = vec.normalize
assert_in_delta 1.0, normalized.magnitude, 0.001
end

def test_normalize_unit_vector
vec = Vector[1, 0, 0]
normalized = vec.normalize
assert_in_delta 1.0, normalized[0], 0.001
assert_in_delta 0.0, normalized[1], 0.001
end

def test_normalize_preserves_direction
vec = Vector[2, 0]
normalized = vec.normalize
assert_in_delta 1.0, normalized[0], 0.001
assert_in_delta 0.0, normalized[1], 0.001
end
end

class MatrixExtensionsTest < Minitest::Test
def test_diag_creates_diagonal_matrix
result = Matrix.diag([1, 2, 3])
expected = Matrix.diagonal(1, 2, 3)
assert_equal expected, result
end

def test_trans_alias
matrix = Matrix[[1, 2], [3, 4]]
assert_equal matrix.transpose, matrix.trans
end

def test_matrix_element_assignment
matrix = Matrix[[1, 2], [3, 4]]
matrix[0, 1] = 99
assert_equal 99, matrix[0, 1]
end
end
82 changes: 82 additions & 0 deletions test/lsi/lsi_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -152,4 +152,86 @@ def test_summary
assert_equal 'This text involves dogs too [...] This text also involves cats',
[@str1, @str2, @str3, @str4, @str5].join.summary(2)
end

# Edge case tests

def test_empty_index_needs_rebuild
lsi = Classifier::LSI.new
refute lsi.needs_rebuild?, 'Empty index should not need rebuild'
end

def test_single_item_needs_rebuild
lsi = Classifier::LSI.new auto_rebuild: false
lsi.add_item 'Single document', 'Category'
refute lsi.needs_rebuild?, 'Single item index should not need rebuild'
end

def test_remove_item
lsi = Classifier::LSI.new
lsi.add_item @str1, 'Dog'
lsi.add_item @str2, 'Dog'

assert_equal 2, lsi.items.size

lsi.remove_item @str1

assert_equal 1, lsi.items.size
refute_includes lsi.items, @str1
end

def test_remove_nonexistent_item
lsi = Classifier::LSI.new
lsi.add_item @str1, 'Dog'

lsi.remove_item 'nonexistent'

assert_equal 1, lsi.items.size, 'Should not affect index when removing nonexistent item'
end

def test_items_method
lsi = Classifier::LSI.new
lsi.add_item @str1, 'Dog'
lsi.add_item @str2, 'Cat'

items = lsi.items
assert_equal 2, items.size
assert_includes items, @str1
assert_includes items, @str2
end

def test_find_related_excludes_self
lsi = Classifier::LSI.new
lsi.add_item @str1, 'Dog'
lsi.add_item @str2, 'Dog'
lsi.add_item @str3, 'Cat'

result = lsi.find_related(@str1, 3)
refute_includes result, @str1, 'Should not include the source document in related results'
end

def test_unicode_mixed_with_ascii
lsi = Classifier::LSI.new
lsi.add_item 'English words and text here', 'English'
lsi.add_item 'More english content available', 'English'
lsi.add_item 'French words bonjour merci', 'French'

result = lsi.classify('english content')
assert_equal 'English', result
end

def test_needs_rebuild_with_auto_rebuild_true
lsi = Classifier::LSI.new auto_rebuild: true
lsi.add_item @str1, 'Dog'
lsi.add_item @str2, 'Dog'

refute lsi.needs_rebuild?, 'Auto-rebuild should keep index current'
end

def test_categories_for_nonexistent_item
lsi = Classifier::LSI.new
lsi.add_item @str1, 'Dog'

result = lsi.categories_for('nonexistent')
assert_empty result, 'Should return empty array for nonexistent item'
end
end