Merge branch 'development'

atupem · atupem · commit b0ee1e66e538 · 2025-08-13T16:08:35.000-04:00
diff --git a/packages/bytebot-agent/src/agent/agent.computer-use.ts b/packages/bytebot-agent/src/agent/agent.computer-use.ts
@@ -303,7 +303,7 @@ async function clickMouse(input: {
         action: 'click_mouse',
         coordinates,
         button,
-        holdKeys,
+        holdKeys: holdKeys && holdKeys.length > 0 ? holdKeys : undefined,
         clickCount,
       }),
     });
@@ -358,7 +358,7 @@ async function dragMouse(input: {
         action: 'drag_mouse',
         path,
         button,
-        holdKeys,
+        holdKeys: holdKeys && holdKeys.length > 0 ? holdKeys : undefined,
       }),
     });
   } catch (error) {
@@ -387,7 +387,7 @@ async function scroll(input: {
         coordinates,
         direction,
         scrollCount,
-        holdKeys,
+        holdKeys: holdKeys && holdKeys.length > 0 ? holdKeys : undefined,
       }),
     });
   } catch (error) {
diff --git a/packages/bytebot-agent/src/agent/agent.constants.ts b/packages/bytebot-agent/src/agent/agent.constants.ts
@@ -50,9 +50,9 @@ CORE WORKING PRINCIPLES
    Use **exactly** the identifiers listed in **VALID KEYS** below when supplying \`keys\` to \`computer_type_keys\` or \`computer_press_keys\`. All identifiers come from nut-tree's \`Key\` enum; they are case-sensitive and contain *no spaces*.
 5. **Verify Every Step** - After each action:  
    a. Take another screenshot.  
-   b. Confirm the expected state before continuing. If it failed, retry sensibly or abort with \`"status":"failed"\`.
+   b. Confirm the expected state before continuing. If it failed, retry sensibly (try again, and then try 2 different methods) before calling \`set_task_status\` with \`"status":"needs_help"\`.
 6. **Efficiency & Clarity** - Combine related key presses; prefer scrolling or dragging over many small moves; minimise unnecessary waits.
-7. **Stay Within Scope** - Do nothing the user didn't request; don't suggest unrelated tasks.
+7. **Stay Within Scope** - Do nothing the user didn't request; don't suggest unrelated tasks. For form and login fields, don't fill in random data, unless explicitly told to do so.
 8. **Security** - If you see a password, secret key, or other sensitive information (or the user shares it with you), do not repeat it in conversation. When typing sensitive information, use \`computer_type_text\` with \`isSensitive\` set to \`true\`.
 9. **Consistency & Persistence** - Even if the task is repetitive, do not end the task until the user's goal is completely met. For bulk operations, maintain focus and continue until all items are processed.
 
@@ -124,23 +124,18 @@ TASK LIFECYCLE TEMPLATE
    { "name": "computer_read_file", "input": { "path": "/path/to/file" } }
    \`\`\`
    This tool reads files and returns them as document content blocks with base64 data, supporting various file types including documents (PDF, DOCX, TXT, etc.) and images (PNG, JPG, etc.).
-   
-8. **Ask for Help** - If you need clarification, invoke          
+8. **Ask for Help** - If you need clarification, or if you are unable to fully complete the task, invoke          
    \`\`\`json
-   { "name": "set_task_status", "input": { "status": "needs_help", "description": "Summary of help needed" } }
+   { "name": "set_task_status", "input": { "status": "needs_help", "description": "Summary of help or clarification needed" } }
    \`\`\`  
 9. **Cleanup** - When the user's goal is met:  
    • Close every window, file, or app you opened so the desktop is tidy.  
    • Return to an idle desktop/background.  
-10. **Terminate** - ONLY ONCE THE USER'S GOAL IS MET, As your final tool call and message, invoke          
+10. **Terminate** - ONLY ONCE THE USER'S GOAL IS COMPLETELY MET, As your final tool call and message, invoke          
    \`\`\`json
    { "name": "set_task_status", "input": { "status": "completed", "description": "Summary of the task" } }
    \`\`\`  
-   Or, if the task is failed or unrecoverable, invoke          
-   \`\`\`json
-   { "name": "set_task_status", "input": { "status": "failed", "description": "Summary of the failure" } }
-   \`\`\`  
-   No further actions or messages follow this call.
+   No further actions or messages will follow this call.
 
 **IMPORTANT**: For bulk operations like "visit each profile in the directory":
 - Do NOT mark as completed after just a few profiles
diff --git a/packages/bytebot-agent/src/agent/agent.processor.ts b/packages/bytebot-agent/src/agent/agent.processor.ts
@@ -355,25 +355,6 @@ export class AgentProcessor {
               },
             ],
           });
-
-          switch (block.input.status) {
-            case 'completed':
-              await this.tasksService.update(taskId, {
-                status: TaskStatus.COMPLETED,
-                completedAt: new Date(),
-              });
-              break;
-            case 'failed':
-              await this.tasksService.update(taskId, {
-                status: TaskStatus.FAILED,
-              });
-              break;
-            case 'needs_help':
-              await this.tasksService.update(taskId, {
-                status: TaskStatus.NEEDS_HELP,
-              });
-              break;
-          }
         }
       }
 
@@ -394,11 +375,6 @@ export class AgentProcessor {
               completedAt: new Date(),
             });
             break;
-          case 'failed':
-            await this.tasksService.update(taskId, {
-              status: TaskStatus.FAILED,
-            });
-            break;
           case 'needs_help':
             await this.tasksService.update(taskId, {
               status: TaskStatus.NEEDS_HELP,
diff --git a/packages/bytebot-agent/src/agent/agent.tools.ts b/packages/bytebot-agent/src/agent/agent.tools.ts
@@ -82,6 +82,7 @@ export const _clickMouseTool = {
       clickCount: {
         type: 'integer' as const,
         description: 'Number of clicks to perform (e.g., 2 for double-click)',
+        default: 1,
       },
     },
     required: ['button', 'clickCount'],
@@ -318,13 +319,13 @@ export const _setTaskStatusTool = {
     properties: {
       status: {
         type: 'string' as const,
-        enum: ['completed', 'failed', 'needs_help'],
+        enum: ['completed', 'needs_help'],
         description: 'The status of the task',
       },
       description: {
         type: 'string' as const,
         description:
-          'If the task is completed, a summary of the task. If the task is failed, a description of the failure.',
+          'If the task is completed, a summary of the task. If the task needs help, a description of the issue or clarification needed.',
       },
     },
     required: ['status', 'description'],
diff --git a/packages/bytebot-agent/src/tasks/tasks.controller.ts b/packages/bytebot-agent/src/tasks/tasks.controller.ts
@@ -183,14 +183,6 @@ export class TasksController {
     return this.messagesService.findProcessedMessages(taskId, options);
   }
 
-  @Patch(':id')
-  async update(
-    @Param('id') id: string,
-    @Body() updateTaskDto: UpdateTaskDto,
-  ): Promise<Task> {
-    return this.tasksService.update(id, updateTaskDto);
-  }
-
   @Delete(':id')
   @HttpCode(HttpStatus.NO_CONTENT)
   async delete(@Param('id') id: string): Promise<void> {
diff --git a/packages/bytebot-agent/src/tasks/tasks.service.ts b/packages/bytebot-agent/src/tasks/tasks.service.ts
@@ -242,13 +242,15 @@ export class TasksService {
       throw new NotFoundException(`Task with ID ${id} not found`);
     }
 
-    const updatedTask = await this.prisma.task.update({
+    let updatedTask = await this.prisma.task.update({
       where: { id },
       data: updateTaskDto,
     });
 
     if (updateTaskDto.status === TaskStatus.COMPLETED) {
       this.eventEmitter.emit('task.completed', { taskId: id });
+    } else if (updateTaskDto.status === TaskStatus.NEEDS_HELP) {
+      updatedTask = await this.takeOver(id);
     } else if (updateTaskDto.status === TaskStatus.FAILED) {
       this.eventEmitter.emit('task.failed', { taskId: id });
     }
@@ -291,15 +293,6 @@ export class TasksService {
     });
 
     this.tasksGateway.emitNewMessage(taskId, message);
-
-    if (task.status === TaskStatus.NEEDS_HELP) {
-      await this.prisma.task.update({
-        where: { id: taskId },
-        data: {
-          status: TaskStatus.RUNNING,
-        },
-      });
-    }
     return task;
   }
 
@@ -319,6 +312,7 @@ export class TasksService {
       where: { id: taskId },
       data: {
         control: Role.ASSISTANT,
+        status: TaskStatus.RUNNING,
       },
     });
 
diff --git a/packages/bytebot-ui/src/app/tasks/[id]/page.tsx b/packages/bytebot-ui/src/app/tasks/[id]/page.tsx
@@ -39,25 +39,39 @@ export default function TaskPage() {
   } = useChatSession({ initialTaskId: taskId });
 
   // Determine if task is inactive (show screenshot) or active (show VNC)
-  const isTaskInactive =
-    taskStatus === TaskStatus.COMPLETED ||
-    taskStatus === TaskStatus.FAILED ||
-    taskStatus === TaskStatus.CANCELLED;
+  function isTaskInactive(): boolean {
+    return (
+      taskStatus === TaskStatus.COMPLETED ||
+      taskStatus === TaskStatus.FAILED ||
+      taskStatus === TaskStatus.CANCELLED
+    );
+  }
 
   // Determine if user can take control
-  const canTakeOver =
-    control === Role.ASSISTANT && taskStatus === TaskStatus.RUNNING;
+  function canTakeOver(): boolean {
+    return control === Role.ASSISTANT && taskStatus === TaskStatus.RUNNING;
+  }
 
   // Determine if user has control or is in takeover mode
-  const hasUserControl =
-    control === Role.USER && taskStatus === TaskStatus.RUNNING;
+  function hasUserControl(): boolean {
+    return (
+      control === Role.USER &&
+      (taskStatus === TaskStatus.RUNNING ||
+        taskStatus === TaskStatus.NEEDS_HELP)
+    );
+  }
 
   // Determine if task can be cancelled
-  const canCancel =
-    taskStatus === TaskStatus.RUNNING || taskStatus === TaskStatus.NEEDS_HELP;
+  function canCancel(): boolean {
+    return (
+      taskStatus === TaskStatus.RUNNING || taskStatus === TaskStatus.NEEDS_HELP
+    );
+  }
 
   // Determine VNC mode - interactive when user has control, view-only otherwise
-  const vncViewOnly = !hasUserControl;
+  function vncViewOnly(): boolean {
+    return !hasUserControl;
+  }
 
   // Use scroll screenshot hook for inactive tasks
   const { currentScreenshot } = useScrollScreenshot({
@@ -90,8 +104,8 @@ export default function TaskPage() {
           {/* Main container */}
           <div className="col-span-4">
             <DesktopContainer
-              screenshot={isTaskInactive ? currentScreenshot : null}
-              viewOnly={vncViewOnly}
+              screenshot={isTaskInactive() ? currentScreenshot : null}
+              viewOnly={vncViewOnly()}
               status={
                 (() => {
                   if (
@@ -110,7 +124,7 @@ export default function TaskPage() {
                 })() as VirtualDesktopStatus
               }
             >
-              {canTakeOver && (
+              {canTakeOver() && (
                 <Button
                   onClick={handleTakeOverTask}
                   variant="default"
@@ -125,12 +139,12 @@ export default function TaskPage() {
                   Take Over
                 </Button>
               )}
-              {hasUserControl && (
+              {hasUserControl() && (
                 <Button onClick={handleResumeTask} variant="default" size="sm">
                   Proceed
                 </Button>
               )}
-              {canCancel && (
+              {canCancel() && (
                 <DropdownMenu>
                   <DropdownMenuTrigger asChild>
                     <Button variant="outline" size="icon">
@@ -158,7 +172,7 @@ export default function TaskPage() {
             {/* Messages scrollable area */}
             <div
               ref={chatContainerRef}
-              className="min-h-0 flex-1 overflow-scroll px-4 hide-scrollbar"
+              className="hide-scrollbar min-h-0 flex-1 overflow-scroll px-4"
             >
               <ChatContainer
                 scrollRef={chatContainerRef}