Skip to content

Commit e5d75fe

Browse files
authored
Merge pull request #2048 from trungutt/trungutt/improve-todo-completion-reliability
fix: improve todo completion reliability
2 parents bf73416 + 6416e3b commit e5d75fe

File tree

2 files changed

+171
-30
lines changed

2 files changed

+171
-30
lines changed

pkg/tools/builtin/todo.go

Lines changed: 62 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"encoding/json"
66
"fmt"
7+
"strings"
78
"sync"
89
"sync/atomic"
910

@@ -53,17 +54,28 @@ type UpdateTodosArgs struct {
5354

5455
// Output types for JSON-structured responses.
5556

57+
type CreateTodoOutput struct {
58+
Created Todo `json:"created" jsonschema:"The created todo item"`
59+
AllTodos []Todo `json:"all_todos" jsonschema:"Current state of all todo items"`
60+
Reminder string `json:"reminder,omitempty" jsonschema:"Reminder about incomplete todos that still need to be completed"`
61+
}
62+
5663
type CreateTodosOutput struct {
57-
Created []Todo `json:"created" jsonschema:"List of created todo items"`
64+
Created []Todo `json:"created" jsonschema:"List of created todo items"`
65+
AllTodos []Todo `json:"all_todos" jsonschema:"Current state of all todo items"`
66+
Reminder string `json:"reminder,omitempty" jsonschema:"Reminder about incomplete todos that still need to be completed"`
5867
}
5968

6069
type UpdateTodosOutput struct {
6170
Updated []TodoUpdate `json:"updated,omitempty" jsonschema:"List of successfully updated todos"`
6271
NotFound []string `json:"not_found,omitempty" jsonschema:"IDs of todos that were not found"`
72+
AllTodos []Todo `json:"all_todos" jsonschema:"Current state of all todo items"`
73+
Reminder string `json:"reminder,omitempty" jsonschema:"Reminder about incomplete todos that still need to be completed"`
6374
}
6475

6576
type ListTodosOutput struct {
66-
Todos []Todo `json:"todos" jsonschema:"List of all current todo items"`
77+
Todos []Todo `json:"todos" jsonschema:"List of all current todo items"`
78+
Reminder string `json:"reminder,omitempty" jsonschema:"Reminder about incomplete todos that still need to be completed"`
6779
}
6880

6981
// TodoStorage defines the storage layer for todo items.
@@ -157,17 +169,20 @@ func (t *TodoTool) Instructions() string {
157169
IMPORTANT: You MUST use these tools to track the progress of your tasks:
158170
159171
1. Before starting any complex task:
160-
- Create a todo for each major step using create_todo
172+
- Create a todo for each major step using create_todos (prefer batch creation)
161173
- Break down complex steps into smaller todos
162174
163175
2. While working:
176+
- Update todo status to "in-progress" BEFORE starting each task
177+
- Mark todos as "completed" IMMEDIATELY after finishing each task
164178
- Use list_todos frequently to keep track of remaining work
165-
- Mark todos as "completed" when finished
166179
167-
3. Task Management Rules:
168-
- Never start a new task without creating a todo for it
169-
- Always check list_todos before responding to ensure no steps are missed
170-
- Update todo status to reflect current progress
180+
3. Task Completion Rules:
181+
- EVERY todo you create MUST eventually be marked "completed"
182+
- Before sending your final response, call list_todos to verify ALL todos are completed
183+
- If any todos remain pending or in-progress, complete them or mark them completed before responding
184+
- Never leave todos in a pending or in-progress state when you are done working
185+
- When updating multiple todos, batch them in a single update_todos call
171186
172187
This toolset is REQUIRED for maintaining task state and ensuring all steps are completed.`
173188
}
@@ -196,15 +211,24 @@ func (h *todoHandler) jsonResult(v any) (*tools.ToolCallResult, error) {
196211
}
197212

198213
func (h *todoHandler) createTodo(_ context.Context, params CreateTodoArgs) (*tools.ToolCallResult, error) {
199-
return h.jsonResult(h.addTodo(params.Description))
214+
created := h.addTodo(params.Description)
215+
return h.jsonResult(CreateTodoOutput{
216+
Created: created,
217+
AllTodos: h.storage.All(),
218+
Reminder: h.incompleteReminder(),
219+
})
200220
}
201221

202222
func (h *todoHandler) createTodos(_ context.Context, params CreateTodosArgs) (*tools.ToolCallResult, error) {
203223
created := make([]Todo, 0, len(params.Descriptions))
204224
for _, desc := range params.Descriptions {
205225
created = append(created, h.addTodo(desc))
206226
}
207-
return h.jsonResult(CreateTodosOutput{Created: created})
227+
return h.jsonResult(CreateTodosOutput{
228+
Created: created,
229+
AllTodos: h.storage.All(),
230+
Reminder: h.incompleteReminder(),
231+
})
208232
}
209233

210234
func (h *todoHandler) updateTodos(_ context.Context, params UpdateTodosArgs) (*tools.ToolCallResult, error) {
@@ -233,32 +257,48 @@ func (h *todoHandler) updateTodos(_ context.Context, params UpdateTodosArgs) (*t
233257
return res, nil
234258
}
235259

236-
if h.allCompleted() {
237-
h.storage.Clear()
238-
}
260+
result.AllTodos = h.storage.All()
261+
result.Reminder = h.incompleteReminder()
239262

240263
return h.jsonResult(result)
241264
}
242265

243-
func (h *todoHandler) allCompleted() bool {
266+
// incompleteReminder returns a reminder string listing any non-completed todos,
267+
// or an empty string if all are completed (or storage is empty).
268+
func (h *todoHandler) incompleteReminder() string {
244269
all := h.storage.All()
245-
if len(all) == 0 {
246-
return false
247-
}
270+
var pending, inProgress []string
248271
for _, todo := range all {
249-
if todo.Status != "completed" {
250-
return false
272+
switch todo.Status {
273+
case "pending":
274+
pending = append(pending, fmt.Sprintf("[%s] %s", todo.ID, todo.Description))
275+
case "in-progress":
276+
inProgress = append(inProgress, fmt.Sprintf("[%s] %s", todo.ID, todo.Description))
251277
}
252278
}
253-
return true
279+
if len(pending) == 0 && len(inProgress) == 0 {
280+
return ""
281+
}
282+
283+
var b strings.Builder
284+
b.WriteString("The following todos are still incomplete and MUST be completed:")
285+
for _, s := range inProgress {
286+
b.WriteString(" (in-progress) " + s)
287+
}
288+
for _, s := range pending {
289+
b.WriteString(" (pending) " + s)
290+
}
291+
return b.String()
254292
}
255293

256294
func (h *todoHandler) listTodos(_ context.Context, _ tools.ToolCall) (*tools.ToolCallResult, error) {
257295
todos := h.storage.All()
258296
if todos == nil {
259297
todos = []Todo{}
260298
}
261-
return h.jsonResult(ListTodosOutput{Todos: todos})
299+
out := ListTodosOutput{Todos: todos}
300+
out.Reminder = h.incompleteReminder()
301+
return h.jsonResult(out)
262302
}
263303

264304
func (t *TodoTool) Tools(context.Context) ([]tools.Tool, error) {
@@ -268,7 +308,7 @@ func (t *TodoTool) Tools(context.Context) ([]tools.Tool, error) {
268308
Category: "todo",
269309
Description: "Create a new todo item with a description",
270310
Parameters: tools.MustSchemaFor[CreateTodoArgs](),
271-
OutputSchema: tools.MustSchemaFor[Todo](),
311+
OutputSchema: tools.MustSchemaFor[CreateTodoOutput](),
272312
Handler: tools.NewHandler(t.handler.createTodo),
273313
Annotations: tools.ToolAnnotations{
274314
Title: "Create TODO",

pkg/tools/builtin/todo_test.go

Lines changed: 109 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,16 @@ func TestTodoTool_CreateTodo(t *testing.T) {
3131
})
3232
require.NoError(t, err)
3333

34-
var output Todo
34+
var output CreateTodoOutput
3535
require.NoError(t, json.Unmarshal([]byte(result.Output), &output))
36-
assert.Equal(t, "todo_1", output.ID)
37-
assert.Equal(t, "Test todo item", output.Description)
38-
assert.Equal(t, "pending", output.Status)
36+
assert.Equal(t, "todo_1", output.Created.ID)
37+
assert.Equal(t, "Test todo item", output.Created.Description)
38+
assert.Equal(t, "pending", output.Created.Status)
39+
40+
// Full state is included in the response
41+
require.Len(t, output.AllTodos, 1)
42+
assert.Equal(t, "todo_1", output.AllTodos[0].ID)
43+
assert.Contains(t, output.Reminder, "todo_1")
3944

4045
require.Equal(t, 1, storage.Len())
4146
requireMeta(t, result, 1)
@@ -59,10 +64,16 @@ func TestTodoTool_CreateTodos(t *testing.T) {
5964
assert.Equal(t, "todo_2", output.Created[1].ID)
6065
assert.Equal(t, "todo_3", output.Created[2].ID)
6166

67+
// Full state included in response
68+
require.Len(t, output.AllTodos, 3)
69+
assert.Contains(t, output.Reminder, "todo_1")
70+
assert.Contains(t, output.Reminder, "todo_2")
71+
assert.Contains(t, output.Reminder, "todo_3")
72+
6273
assert.Equal(t, 3, storage.Len())
6374
requireMeta(t, result, 3)
6475

65-
// A second call continues the ID sequence
76+
// A second call continues the ID sequence and includes all 4 items
6677
result, err = tool.handler.createTodos(t.Context(), CreateTodosArgs{
6778
Descriptions: []string{"Last"},
6879
})
@@ -71,6 +82,7 @@ func TestTodoTool_CreateTodos(t *testing.T) {
7182
require.NoError(t, json.Unmarshal([]byte(result.Output), &output))
7283
require.Len(t, output.Created, 1)
7384
assert.Equal(t, "todo_4", output.Created[0].ID)
85+
require.Len(t, output.AllTodos, 4)
7486
assert.Equal(t, 4, storage.Len())
7587
requireMeta(t, result, 4)
7688
}
@@ -95,9 +107,28 @@ func TestTodoTool_ListTodos(t *testing.T) {
95107
assert.Equal(t, "pending", output.Todos[i].Status)
96108
}
97109

110+
// All pending, so reminder should list all of them
111+
assert.Contains(t, output.Reminder, "todo_1")
112+
assert.Contains(t, output.Reminder, "todo_2")
113+
assert.Contains(t, output.Reminder, "todo_3")
114+
98115
requireMeta(t, result, 3)
99116
}
100117

118+
func TestTodoTool_ListTodos_Empty(t *testing.T) {
119+
tool := NewTodoTool()
120+
121+
result, err := tool.handler.listTodos(t.Context(), tools.ToolCall{})
122+
require.NoError(t, err)
123+
124+
var output ListTodosOutput
125+
require.NoError(t, json.Unmarshal([]byte(result.Output), &output))
126+
assert.Empty(t, output.Todos)
127+
assert.Empty(t, output.Reminder)
128+
129+
requireMeta(t, result, 0)
130+
}
131+
101132
func TestTodoTool_UpdateTodos(t *testing.T) {
102133
storage := NewMemoryTodoStorage()
103134
tool := NewTodoTool(WithStorage(storage))
@@ -125,6 +156,17 @@ func TestTodoTool_UpdateTodos(t *testing.T) {
125156
assert.Equal(t, "in-progress", output.Updated[1].Status)
126157
assert.Empty(t, output.NotFound)
127158

159+
// Full state included in response
160+
require.Len(t, output.AllTodos, 3)
161+
assert.Equal(t, "completed", output.AllTodos[0].Status)
162+
assert.Equal(t, "pending", output.AllTodos[1].Status)
163+
assert.Equal(t, "in-progress", output.AllTodos[2].Status)
164+
165+
// Reminder should list incomplete todos
166+
assert.Contains(t, output.Reminder, "todo_2")
167+
assert.Contains(t, output.Reminder, "todo_3")
168+
assert.NotContains(t, output.Reminder, "todo_1") // completed, should not appear
169+
128170
todos := storage.All()
129171
require.Len(t, todos, 3)
130172
assert.Equal(t, "completed", todos[0].Status)
@@ -159,6 +201,9 @@ func TestTodoTool_UpdateTodos_PartialFailure(t *testing.T) {
159201
require.Len(t, output.NotFound, 1)
160202
assert.Equal(t, "nonexistent", output.NotFound[0])
161203

204+
// Reminder should mention the still-pending todo
205+
assert.Contains(t, output.Reminder, "todo_2")
206+
162207
todos := storage.All()
163208
require.Len(t, todos, 2)
164209
assert.Equal(t, "completed", todos[0].Status)
@@ -185,7 +230,7 @@ func TestTodoTool_UpdateTodos_AllNotFound(t *testing.T) {
185230
assert.Equal(t, "nonexistent2", output.NotFound[1])
186231
}
187232

188-
func TestTodoTool_UpdateTodos_ClearsWhenAllCompleted(t *testing.T) {
233+
func TestTodoTool_UpdateTodos_AllCompleted_NoAutoRemoval(t *testing.T) {
189234
storage := NewMemoryTodoStorage()
190235
tool := NewTodoTool(WithStorage(storage))
191236

@@ -205,9 +250,16 @@ func TestTodoTool_UpdateTodos_ClearsWhenAllCompleted(t *testing.T) {
205250
var output UpdateTodosOutput
206251
require.NoError(t, json.Unmarshal([]byte(result.Output), &output))
207252
require.Len(t, output.Updated, 2)
253+
assert.Empty(t, output.Reminder) // no reminder when all completed
208254

209-
assert.Empty(t, storage.All())
210-
requireMeta(t, result, 0)
255+
// Full state shows both items as completed
256+
require.Len(t, output.AllTodos, 2)
257+
assert.Equal(t, "completed", output.AllTodos[0].Status)
258+
assert.Equal(t, "completed", output.AllTodos[1].Status)
259+
260+
// Todos remain in storage (no auto-clear on completion)
261+
assert.Equal(t, 2, storage.Len())
262+
requireMeta(t, result, 2)
211263
}
212264

213265
func TestTodoTool_WithStorage(t *testing.T) {
@@ -254,6 +306,55 @@ func TestTodoTool_ParametersAreObjects(t *testing.T) {
254306
}
255307
}
256308

309+
func TestTodoTool_CreateTodo_FullStateOutput(t *testing.T) {
310+
tool := NewTodoTool()
311+
312+
// Create first todo
313+
result1, err := tool.handler.createTodo(t.Context(), CreateTodoArgs{Description: "First"})
314+
require.NoError(t, err)
315+
var out1 CreateTodoOutput
316+
require.NoError(t, json.Unmarshal([]byte(result1.Output), &out1))
317+
require.Len(t, out1.AllTodos, 1)
318+
assert.Contains(t, out1.Reminder, "todo_1")
319+
320+
// Create second todo — response shows both
321+
result2, err := tool.handler.createTodo(t.Context(), CreateTodoArgs{Description: "Second"})
322+
require.NoError(t, err)
323+
var out2 CreateTodoOutput
324+
require.NoError(t, json.Unmarshal([]byte(result2.Output), &out2))
325+
require.Len(t, out2.AllTodos, 2)
326+
assert.Contains(t, out2.Reminder, "todo_1")
327+
assert.Contains(t, out2.Reminder, "todo_2")
328+
}
329+
330+
func TestTodoTool_UpdateTodos_FullStateOutput(t *testing.T) {
331+
tool := NewTodoTool()
332+
333+
_, err := tool.handler.createTodos(t.Context(), CreateTodosArgs{
334+
Descriptions: []string{"A", "B", "C"},
335+
})
336+
require.NoError(t, err)
337+
338+
result, err := tool.handler.updateTodos(t.Context(), UpdateTodosArgs{
339+
Updates: []TodoUpdate{{ID: "todo_1", Status: "completed"}},
340+
})
341+
require.NoError(t, err)
342+
343+
var output UpdateTodosOutput
344+
require.NoError(t, json.Unmarshal([]byte(result.Output), &output))
345+
346+
// AllTodos shows full state including the completed item
347+
require.Len(t, output.AllTodos, 3)
348+
assert.Equal(t, "completed", output.AllTodos[0].Status)
349+
assert.Equal(t, "pending", output.AllTodos[1].Status)
350+
assert.Equal(t, "pending", output.AllTodos[2].Status)
351+
352+
// Reminder only lists incomplete items
353+
assert.NotContains(t, output.Reminder, "todo_1")
354+
assert.Contains(t, output.Reminder, "todo_2")
355+
assert.Contains(t, output.Reminder, "todo_3")
356+
}
357+
257358
// requireMeta asserts that result.Meta is a []Todo of the expected length.
258359
func requireMeta(t *testing.T, result *tools.ToolCallResult, expectedLen int) {
259360
t.Helper()

0 commit comments

Comments
 (0)