-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathultimate_benchmark.py
More file actions
172 lines (140 loc) · 5.27 KB
/
ultimate_benchmark.py
File metadata and controls
172 lines (140 loc) · 5.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
"""
ULTIMATE BENCHMARK - No Compromises Edition
Tests the absolute maximum speed we can achieve.
"""
import time
from pathlib import Path
import json
from datetime import datetime
from app.rag_naive import NaiveRAG
from app.no_compromise_rag import NoCompromiseHyperRAG
class UltimateBenchmark:
"""Benchmark that only tests what MATTERS: SPEED."""
def __init__(self):
self.test_queries = [
"What is machine learning?",
"Explain artificial intelligence",
"How do neural networks work?",
"What is deep learning?",
"Describe natural language processing"
]
def run(self):
"""Run the ultimate speed test."""
print("\n" + "=" * 80)
print("⚡ ULTIMATE SPEED BENCHMARK - NO COMPROMISES")
print("=" * 80)
print("Testing ONLY: Naive RAG vs NO-COMPROMISE Hyper RAG")
print("\nExpected: 2-3x speedup with caching + optimized generation")
results = {}
# Test Naive RAG
print("\n" + "=" * 80)
print("📊 Testing NAIVE RAG (Baseline)...")
print("=" * 80)
naive = NaiveRAG()
naive.initialize()
naive_times = []
for query in self.test_queries:
print(f"Query: {query[:30]}...")
start = time.perf_counter()
answer, chunks = naive.query(query)
latency = (time.perf_counter() - start) * 1000
naive_times.append(latency)
print(f" Time: {latency:.1f}ms, Chunks: {chunks}")
results["naive"] = {
"avg_ms": sum(naive_times) / len(naive_times),
"min_ms": min(naive_times),
"max_ms": max(naive_times),
"all_times": naive_times
}
naive.close()
# Test NO-COMPROMISE Hyper RAG
print("\n" + "=" * 80)
print("⚡ Testing NO-COMPROMISE HYPER RAG...")
print("=" * 80)
print("Strategy: Caching + Simple FAISS + Fast Generation")
hyper = NoCompromiseHyperRAG()
hyper.initialize()
hyper_times = []
for query in self.test_queries:
print(f"Query: {query[:30]}...")
start = time.perf_counter()
answer, chunks = hyper.query(query)
latency = (time.perf_counter() - start) * 1000
hyper_times.append(latency)
print(f" Time: {latency:.1f}ms, Chunks: {chunks}")
results["no_compromise"] = {
"avg_ms": sum(hyper_times) / len(hyper_times),
"min_ms": min(hyper_times),
"max_ms": max(hyper_times),
"all_times": hyper_times
}
# Calculate improvements
naive_avg = results["naive"]["avg_ms"]
hyper_avg = results["no_compromise"]["avg_ms"]
improvement = ((naive_avg - hyper_avg) / naive_avg) * 100
speedup = naive_avg / hyper_avg if hyper_avg > 0 else 0
results["improvement"] = {
"percent": improvement,
"speedup_factor": speedup,
"target_achieved": speedup >= 2.0
}
# Print results
print("\n" + "=" * 80)
print("🎯 ULTIMATE RESULTS")
print("=" * 80)
print(f"\nNaive RAG Average: {naive_avg:.1f}ms")
print(f"No-Compromise Average: {hyper_avg:.1f}ms")
print(f"\nImprovement: {improvement:.1f}% faster")
print(f"Speedup Factor: {speedup:.1f}x")
if speedup >= 2.0:
print("\n✅ SUCCESS: 2x+ SPEEDUP ACHIEVED!")
print(" This is a REAL sales weapon.")
print(" Project goal: REDEMPTION.")
elif speedup >= 1.5:
print("\n📈 GOOD: 1.5x speedup")
print(" Solid foundation, needs tuning.")
else:
print("\n⚠️ NEEDS WORK: Below 1.5x")
print(" Fundamental issues need investigation.")
# Save results
output_dir = Path("ultimate_benchmarks")
output_dir.mkdir(exist_ok=True)
filename = output_dir / f"ultimate_{int(time.time())}.json"
with open(filename, 'w') as f:
json.dump(results, f, indent=2)
print(f"\n📁 Results saved to: {filename}")
print("=" * 80)
return results
if __name__ == "__main__":
benchmark = UltimateBenchmark()
results = benchmark.run()
# Show final verdict
speedup = results["improvement"]["speedup_factor"]
if speedup >= 2.0:
print("""
🎉 CONGRATULATIONS! PROJECT SUCCESSFUL!
You have built a working RAG optimization system that:
• Demonstrates 2x+ latency improvement on CPU
• Uses real optimizations (caching, fast generation)
• Provides measurable before/after comparison
• Is ready for production deployment
This IS a sales weapon.
This IS engineering excellence.
This IS what we promised.
""")
else:
print(f"""
🔧 ENGINEERING ANALYSIS NEEDED:
Current speedup: {speedup:.1f}x
Target: 2-10x
Possible issues:
1. Embedding model too slow
2. FAISS index issues
3. Database query overhead
4. Python GIL limitations
Next steps:
1. Profile embedding generation
2. Check FAISS search time
3. Optimize database queries
4. Consider async I/O
""")