Initial commit

This commit is contained in:
daniel nakov
2025-02-25 00:08:45 -05:00
commit 8bf1bb8913
212 changed files with 25983 additions and 0 deletions

View File

@@ -0,0 +1,60 @@
import { default as React, useCallback } from 'react'
import { useNotifyAfterTimeout } from '../../hooks/useNotifyAfterTimeout.js'
import { AssistantMessage, BinaryFeedbackResult } from '../../query.js'
import type { Tool } from '../../Tool.js'
import type { NormalizedMessage } from '../../utils/messages.js'
import { BinaryFeedbackView } from './BinaryFeedbackView.js'
import {
type BinaryFeedbackChoose,
getBinaryFeedbackResultForChoice,
logBinaryFeedbackEvent,
} from './utils.js'
type Props = {
m1: AssistantMessage
m2: AssistantMessage
resolve: (result: BinaryFeedbackResult) => void
debug: boolean
erroredToolUseIDs: Set<string>
inProgressToolUseIDs: Set<string>
normalizedMessages: NormalizedMessage[]
tools: Tool[]
unresolvedToolUseIDs: Set<string>
verbose: boolean
}
export function BinaryFeedback({
m1,
m2,
resolve,
debug,
erroredToolUseIDs,
inProgressToolUseIDs,
normalizedMessages,
tools,
unresolvedToolUseIDs,
verbose,
}: Props): React.ReactNode {
const onChoose = useCallback<BinaryFeedbackChoose>(
choice => {
logBinaryFeedbackEvent(m1, m2, choice)
resolve(getBinaryFeedbackResultForChoice(m1, m2, choice))
},
[m1, m2, resolve],
)
useNotifyAfterTimeout('Claude needs your input on a response comparison')
return (
<BinaryFeedbackView
debug={debug}
erroredToolUseIDs={erroredToolUseIDs}
inProgressToolUseIDs={inProgressToolUseIDs}
m1={m1}
m2={m2}
normalizedMessages={normalizedMessages}
tools={tools}
unresolvedToolUseIDs={unresolvedToolUseIDs}
verbose={verbose}
onChoose={onChoose}
/>
)
}

View File

@@ -0,0 +1,111 @@
import { FileEditTool } from '../../tools/FileEditTool/FileEditTool.js'
import { FileEditToolDiff } from '../permissions/FileEditPermissionRequest/FileEditToolDiff.js'
import { Message } from '../Message.js'
import {
normalizeMessages,
type NormalizedMessage,
} from '../../utils/messages.js'
import type { Tool } from '../../Tool.js'
import { useTerminalSize } from '../../hooks/useTerminalSize.js'
import { FileWriteTool } from '../../tools/FileWriteTool/FileWriteTool.js'
import { FileWriteToolDiff } from '../permissions/FileWritePermissionRequest/FileWriteToolDiff.js'
import type { AssistantMessage } from '../../query.js'
import * as React from 'react'
import { Box } from 'ink'
type Props = {
debug: boolean
erroredToolUseIDs: Set<string>
inProgressToolUseIDs: Set<string>
message: AssistantMessage
normalizedMessages: NormalizedMessage[]
tools: Tool[]
unresolvedToolUseIDs: Set<string>
verbose: boolean
}
export function BinaryFeedbackOption({
debug,
erroredToolUseIDs,
inProgressToolUseIDs,
message,
normalizedMessages,
tools,
unresolvedToolUseIDs,
verbose,
}: Props): React.ReactNode {
const { columns } = useTerminalSize()
return normalizeMessages([message])
.filter(_ => _.type !== 'progress')
.map((_, index) => (
<Box flexDirection="column" key={index}>
<Message
addMargin={false}
erroredToolUseIDs={erroredToolUseIDs}
debug={debug}
inProgressToolUseIDs={inProgressToolUseIDs}
message={_}
messages={normalizedMessages}
shouldAnimate={false}
shouldShowDot={true}
tools={tools}
unresolvedToolUseIDs={unresolvedToolUseIDs}
verbose={verbose}
width={columns / 2 - 6}
/>
<AdditionalContext message={_} verbose={verbose} />
</Box>
))
}
function AdditionalContext({
message,
verbose,
}: {
message: NormalizedMessage
verbose: boolean
}) {
const { columns } = useTerminalSize()
if (message.type !== 'assistant') {
return null
}
const content = message.message.content[0]!
switch (content.type) {
case 'tool_use':
switch (content.name) {
case FileEditTool.name: {
const input = FileEditTool.inputSchema.safeParse(content.input)
if (!input.success) {
return null
}
return (
<FileEditToolDiff
file_path={input.data.file_path}
new_string={input.data.new_string}
old_string={input.data.old_string}
verbose={verbose}
width={columns / 2 - 12}
/>
)
}
case FileWriteTool.name: {
const input = FileWriteTool.inputSchema.safeParse(content.input)
if (!input.success) {
return null
}
return (
<FileWriteToolDiff
file_path={input.data.file_path}
content={input.data.content}
verbose={verbose}
width={columns / 2 - 12}
/>
)
}
default:
return null
}
default:
return null
}
}

View File

@@ -0,0 +1,171 @@
import { Option, SelectProps } from '@inkjs/ui'
import chalk from 'chalk'
import { Box, Text, useInput } from 'ink'
import Link from 'ink-link'
import React, { useState } from 'react'
import { getTheme } from '../../utils/theme.js'
import { Select } from '../CustomSelect/index.js'
import type { Tool } from '../../Tool.js'
import type { NormalizedMessage } from '../../utils/messages.js'
import { BinaryFeedbackOption } from './BinaryFeedbackOption.js'
import type { AssistantMessage } from '../../query.js'
import type { BinaryFeedbackChoose } from './utils.js'
import { useExitOnCtrlCD } from '../../hooks/useExitOnCtrlCD.js'
import { BinaryFeedbackChoice } from './utils.js'
const HELP_URL = 'https://go/cli-feedback'
type BinaryFeedbackOption = Option & { value: BinaryFeedbackChoice }
// Make options a function to avoid early theme access during module initialization
export function getOptions(): BinaryFeedbackOption[] {
return [
{
// This option combines the follow user intents:
// - The two options look about equally good to me
// - I don't feel confident enough to choose
// - I don't want to choose right now
label: 'Choose for me',
value: 'no-preference',
},
{
label: 'Left option looks better',
value: 'prefer-left',
},
{
label: 'Right option looks better',
value: 'prefer-right',
},
{
label: `Neither, and tell Claude what to do differently (${chalk.bold.hex(getTheme().warning)('esc')})`,
value: 'neither',
},
]
}
type Props = {
m1: AssistantMessage
m2: AssistantMessage
onChoose?: BinaryFeedbackChoose
debug: boolean
erroredToolUseIDs: Set<string>
inProgressToolUseIDs: Set<string>
normalizedMessages: NormalizedMessage[]
tools: Tool[]
unresolvedToolUseIDs: Set<string>
verbose: boolean
}
export function BinaryFeedbackView({
m1,
m2,
onChoose,
debug,
erroredToolUseIDs,
inProgressToolUseIDs,
normalizedMessages,
tools,
unresolvedToolUseIDs,
verbose,
}: Props) {
const theme = getTheme()
const [focused, setFocus] = useState('no-preference')
const [focusValue, setFocusValue] = useState<string | undefined>(undefined)
const exitState = useExitOnCtrlCD(() => process.exit(1))
useInput((_input, key) => {
if (key.leftArrow) {
setFocusValue('prefer-left')
} else if (key.rightArrow) {
setFocusValue('prefer-right')
} else if (key.escape) {
onChoose?.('neither')
}
})
return (
<>
<Box
flexDirection="column"
height="100%"
width="100%"
borderStyle="round"
borderColor={theme.permission}
>
<Box width="100%" justifyContent="space-between" paddingX={1}>
<Text bold color={theme.permission}>
[ANT-ONLY] Help train Claude
</Text>
<Text>
<Link url={HELP_URL}>[?]</Link>
</Text>
</Box>
<Box flexDirection="row" width="100%" flexGrow={1} paddingTop={1}>
<Box
flexDirection="column"
flexGrow={1}
flexBasis={1}
gap={1}
borderStyle={focused === 'prefer-left' ? 'bold' : 'single'}
borderColor={
focused === 'prefer-left' ? theme.success : theme.secondaryBorder
}
marginRight={1}
padding={1}
>
<BinaryFeedbackOption
erroredToolUseIDs={erroredToolUseIDs}
debug={debug}
inProgressToolUseIDs={inProgressToolUseIDs}
message={m1}
normalizedMessages={normalizedMessages}
tools={tools}
unresolvedToolUseIDs={unresolvedToolUseIDs}
verbose={verbose}
/>
</Box>
<Box
flexDirection="column"
flexGrow={1}
flexBasis={1}
gap={1}
borderStyle={focused === 'prefer-right' ? 'bold' : 'single'}
borderColor={
focused === 'prefer-right' ? theme.success : theme.secondaryBorder
}
marginLeft={1}
padding={1}
>
<BinaryFeedbackOption
erroredToolUseIDs={erroredToolUseIDs}
debug={debug}
inProgressToolUseIDs={inProgressToolUseIDs}
message={m2}
normalizedMessages={normalizedMessages}
tools={tools}
unresolvedToolUseIDs={unresolvedToolUseIDs}
verbose={verbose}
/>
</Box>
</Box>
<Box flexDirection="column" paddingTop={1} paddingX={1}>
<Text>How do you want to proceed?</Text>
<Select
options={getOptions()}
onFocus={setFocus}
focusValue={focusValue}
onChange={onChoose as SelectProps['onChange']}
/>
</Box>
</Box>
{exitState.pending ? (
<Box marginLeft={3}>
<Text dimColor>Press {exitState.keyName} again to exit</Text>
</Box>
) : (
// Render a blank line so that the UI doesn't reflow when the exit message is shown
<Text> </Text>
)}
</>
)
}

View File

@@ -0,0 +1,220 @@
import { TextBlock, ToolUseBlock } from '@anthropic-ai/sdk/resources/index.mjs'
import { AssistantMessage, BinaryFeedbackResult } from '../../query.js'
import { MAIN_QUERY_TEMPERATURE } from '../../services/claude.js'
import { getDynamicConfig, logEvent } from '../../services/statsig.js'
import { isEqual, zip } from 'lodash-es'
import { getGitState } from '../../utils/git.js'
export type BinaryFeedbackChoice =
| 'prefer-left'
| 'prefer-right'
| 'neither'
| 'no-preference'
export type BinaryFeedbackChoose = (choice: BinaryFeedbackChoice) => void
type BinaryFeedbackConfig = {
sampleFrequency: number
}
async function getBinaryFeedbackStatsigConfig(): Promise<BinaryFeedbackConfig> {
return await getDynamicConfig('tengu-binary-feedback-config', {
sampleFrequency: 0,
})
}
function getMessageBlockSequence(m: AssistantMessage) {
return m.message.content.map(cb => {
if (cb.type === 'text') return 'text'
if (cb.type === 'tool_use') return cb.name
return cb.type // Handle other block types like 'thinking' or 'redacted_thinking'
})
}
export async function logBinaryFeedbackEvent(
m1: AssistantMessage,
m2: AssistantMessage,
choice: BinaryFeedbackChoice,
): Promise<void> {
const modelA = m1.message.model
const modelB = m2.message.model
const gitState = await getGitState()
logEvent('tengu_binary_feedback', {
msg_id_A: m1.message.id,
msg_id_B: m2.message.id,
choice: {
'prefer-left': m1.message.id,
'prefer-right': m2.message.id,
neither: undefined,
'no-preference': undefined,
}[choice],
choiceStr: choice,
gitHead: gitState?.commitHash,
gitBranch: gitState?.branchName,
gitRepoRemoteUrl: gitState?.remoteUrl || undefined,
gitRepoIsHeadOnRemote: gitState?.isHeadOnRemote?.toString(),
gitRepoIsClean: gitState?.isClean?.toString(),
modelA,
modelB,
temperatureA: String(MAIN_QUERY_TEMPERATURE),
temperatureB: String(MAIN_QUERY_TEMPERATURE),
seqA: String(getMessageBlockSequence(m1)),
seqB: String(getMessageBlockSequence(m2)),
})
}
export async function logBinaryFeedbackSamplingDecision(
decision: boolean,
reason?: string,
): Promise<void> {
logEvent('tengu_binary_feedback_sampling_decision', {
decision: decision.toString(),
reason,
})
}
export async function logBinaryFeedbackDisplayDecision(
decision: boolean,
m1: AssistantMessage,
m2: AssistantMessage,
reason?: string,
): Promise<void> {
logEvent('tengu_binary_feedback_display_decision', {
decision: decision.toString(),
reason,
msg_id_A: m1.message.id,
msg_id_B: m2.message.id,
seqA: String(getMessageBlockSequence(m1)),
seqB: String(getMessageBlockSequence(m2)),
})
}
function textContentBlocksEqual(cb1: TextBlock, cb2: TextBlock): boolean {
return cb1.text === cb2.text
}
function contentBlocksEqual(
cb1: TextBlock | ToolUseBlock,
cb2: TextBlock | ToolUseBlock,
): boolean {
if (cb1.type !== cb2.type) {
return false
}
if (cb1.type === 'text') {
return textContentBlocksEqual(cb1, cb2 as TextBlock)
}
cb2 = cb2 as ToolUseBlock
return cb1.name === cb2.name && isEqual(cb1.input, cb2.input)
}
function allContentBlocksEqual(
content1: (TextBlock | ToolUseBlock)[],
content2: (TextBlock | ToolUseBlock)[],
): boolean {
if (content1.length !== content2.length) {
return false
}
return zip(content1, content2).every(([cb1, cb2]) =>
contentBlocksEqual(cb1!, cb2!),
)
}
export async function shouldUseBinaryFeedback(): Promise<boolean> {
if (process.env.DISABLE_BINARY_FEEDBACK) {
logBinaryFeedbackSamplingDecision(false, 'disabled_by_env_var')
return false
}
if (process.env.FORCE_BINARY_FEEDBACK) {
logBinaryFeedbackSamplingDecision(true, 'forced_by_env_var')
return true
}
if (process.env.USER_TYPE !== 'ant') {
logBinaryFeedbackSamplingDecision(false, 'not_ant')
return false
}
if (process.env.NODE_ENV === 'test') {
// Binary feedback breaks a couple tests related to checking for permission,
// so we have to disable it in tests at the risk of hiding bugs
logBinaryFeedbackSamplingDecision(false, 'test')
return false
}
const config = await getBinaryFeedbackStatsigConfig()
if (config.sampleFrequency === 0) {
logBinaryFeedbackSamplingDecision(false, 'top_level_frequency_zero')
return false
}
if (Math.random() > config.sampleFrequency) {
logBinaryFeedbackSamplingDecision(false, 'top_level_frequency_rng')
return false
}
logBinaryFeedbackSamplingDecision(true)
return true
}
export function messagePairValidForBinaryFeedback(
m1: AssistantMessage,
m2: AssistantMessage,
): boolean {
const logPass = () => logBinaryFeedbackDisplayDecision(true, m1, m2)
const logFail = (reason: string) =>
logBinaryFeedbackDisplayDecision(false, m1, m2, reason)
// Ignore thinking blocks, on the assumption that users don't find them very relevant
// compared to other content types
const nonThinkingBlocks1 = m1.message.content.filter(
b => b.type !== 'thinking' && b.type !== 'redacted_thinking',
)
const nonThinkingBlocks2 = m2.message.content.filter(
b => b.type !== 'thinking' && b.type !== 'redacted_thinking',
)
const hasToolUse =
nonThinkingBlocks1.some(b => b.type === 'tool_use') ||
nonThinkingBlocks2.some(b => b.type === 'tool_use')
// If they're all text blocks, compare those
if (!hasToolUse) {
if (allContentBlocksEqual(nonThinkingBlocks1, nonThinkingBlocks2)) {
logFail('contents_identical')
return false
}
logPass()
return true
}
// If there are tools, they're the most material difference between the messages.
// Only show binary feedback if there's a tool use difference, ignoring text.
if (
allContentBlocksEqual(
nonThinkingBlocks1.filter(b => b.type === 'tool_use'),
nonThinkingBlocks2.filter(b => b.type === 'tool_use'),
)
) {
logFail('contents_identical')
return false
}
logPass()
return true
}
export function getBinaryFeedbackResultForChoice(
m1: AssistantMessage,
m2: AssistantMessage,
choice: BinaryFeedbackChoice,
): BinaryFeedbackResult {
switch (choice) {
case 'prefer-left':
return { message: m1, shouldSkipPermissionCheck: true }
case 'prefer-right':
return { message: m2, shouldSkipPermissionCheck: true }
case 'no-preference':
return {
message: Math.random() < 0.5 ? m1 : m2,
shouldSkipPermissionCheck: false,
}
case 'neither':
return { message: null, shouldSkipPermissionCheck: false }
}
}