55 * For incoming text that doesn't need mention filtering, use sanitize_incoming_text.cjs instead.
66 */
77
8- const { sanitizeContentCore, getRedactedDomains, clearRedactedDomains, writeRedactedDomainsLog, extractDomainsFromUrl, addRedactedDomain } = require ( "./sanitize_content_core.cjs" ) ;
8+ const {
9+ sanitizeContentCore,
10+ getRedactedDomains,
11+ clearRedactedDomains,
12+ writeRedactedDomainsLog,
13+ buildAllowedDomains,
14+ sanitizeUrlProtocols,
15+ sanitizeUrlDomains,
16+ neutralizeCommands,
17+ removeXmlComments,
18+ convertXmlTags,
19+ neutralizeBotTriggers,
20+ applyTruncation,
21+ } = require ( "./sanitize_content_core.cjs" ) ;
922
1023/**
1124 * @typedef {Object } SanitizeOptions
@@ -40,46 +53,25 @@ function sanitizeContent(content, maxLengthOrOptions) {
4053 }
4154
4255 // If allowed aliases are specified, we need custom mention filtering
43- // We'll do most of the sanitization with the core, then apply selective mention filtering
56+ // We'll apply the same sanitization pipeline but with selective mention filtering
4457
4558 if ( ! content || typeof content !== "string" ) {
4659 return "" ;
4760 }
4861
49- // Read allowed domains from environment variable
50- const allowedDomainsEnv = process . env . GH_AW_ALLOWED_DOMAINS ;
51- const defaultAllowedDomains = [ "github.com" , "github.io" , "githubusercontent.com" , "githubassets.com" , "github.dev" , "codespaces.new" ] ;
52-
53- let allowedDomains = allowedDomainsEnv
54- ? allowedDomainsEnv
55- . split ( "," )
56- . map ( d => d . trim ( ) )
57- . filter ( d => d )
58- : defaultAllowedDomains ;
59-
60- // Extract and add GitHub domains from GitHub context URLs
61- const githubServerUrl = process . env . GITHUB_SERVER_URL ;
62- const githubApiUrl = process . env . GITHUB_API_URL ;
63-
64- if ( githubServerUrl ) {
65- const serverDomains = extractDomainsFromUrl ( githubServerUrl ) ;
66- allowedDomains = allowedDomains . concat ( serverDomains ) ;
67- }
68-
69- if ( githubApiUrl ) {
70- const apiDomains = extractDomainsFromUrl ( githubApiUrl ) ;
71- allowedDomains = allowedDomains . concat ( apiDomains ) ;
72- }
73-
74- // Remove duplicates
75- allowedDomains = [ ...new Set ( allowedDomains ) ] ;
62+ // Build list of allowed domains (shared with core)
63+ const allowedDomains = buildAllowedDomains ( ) ;
7664
7765 let sanitized = content ;
7866
67+ // Remove ANSI escape sequences and control characters early
68+ sanitized = sanitized . replace ( / \x1b \[ [ 0 - 9 ; ] * [ m G K H ] / g, "" ) ;
69+ sanitized = sanitized . replace ( / [ \x00 - \x08 \x0B \x0C \x0E - \x1F \x7F ] / g, "" ) ;
70+
7971 // Neutralize commands at the start of text
8072 sanitized = neutralizeCommands ( sanitized ) ;
8173
82- // Neutralize @mentions with selective filtering
74+ // Neutralize @mentions with selective filtering (custom logic for allowed aliases)
8375 sanitized = neutralizeMentions ( sanitized , allowedAliasesLowercase ) ;
8476
8577 // Remove XML comments
@@ -88,138 +80,18 @@ function sanitizeContent(content, maxLengthOrOptions) {
8880 // Convert XML tags
8981 sanitized = convertXmlTags ( sanitized ) ;
9082
91- // Remove ANSI escape sequences
92- sanitized = sanitized . replace ( / \x1b \[ [ 0 - 9 ; ] * [ m G K H ] / g, "" ) ;
93-
94- // Remove control characters
95- sanitized = sanitized . replace ( / [ \x00 - \x08 \x0B \x0C \x0E - \x1F \x7F ] / g, "" ) ;
96-
97- // URI filtering
83+ // URI filtering (shared with core)
9884 sanitized = sanitizeUrlProtocols ( sanitized ) ;
9985 sanitized = sanitizeUrlDomains ( sanitized , allowedDomains ) ;
10086
101- // Truncation
102- const lines = sanitized . split ( "\n" ) ;
103- const maxLines = 65000 ;
104- maxLength = maxLength || 524288 ;
105-
106- if ( lines . length > maxLines ) {
107- const truncationMsg = "\n[Content truncated due to line count]" ;
108- const truncatedLines = lines . slice ( 0 , maxLines ) . join ( "\n" ) + truncationMsg ;
109-
110- if ( truncatedLines . length > maxLength ) {
111- sanitized = truncatedLines . substring ( 0 , maxLength - truncationMsg . length ) + truncationMsg ;
112- } else {
113- sanitized = truncatedLines ;
114- }
115- } else if ( sanitized . length > maxLength ) {
116- sanitized = sanitized . substring ( 0 , maxLength ) + "\n[Content truncated due to length]" ;
117- }
87+ // Apply truncation limits (shared with core)
88+ sanitized = applyTruncation ( sanitized , maxLength ) ;
11889
11990 // Neutralize bot triggers
12091 sanitized = neutralizeBotTriggers ( sanitized ) ;
12192
12293 return sanitized . trim ( ) ;
12394
124- /**
125- * Sanitize URL domains
126- * @param {string } s - The string to process
127- * @param {string[] } allowed - Allowed domains
128- * @returns {string } Sanitized string
129- */
130- function sanitizeUrlDomains ( s , allowed ) {
131- const httpsUrlRegex = / h t t p s : \/ \/ ( [ \w . - ] + (?: : \d + ) ? ) ( \/ [ ^ \s ] * ) ? / gi;
132-
133- const result = s . replace ( httpsUrlRegex , ( match , hostnameWithPort , pathPart ) => {
134- const hostname = hostnameWithPort . split ( ":" ) [ 0 ] . toLowerCase ( ) ;
135- pathPart = pathPart || "" ;
136-
137- const isAllowed = allowed . some ( allowedDomain => {
138- const normalizedAllowed = allowedDomain . toLowerCase ( ) ;
139-
140- if ( hostname === normalizedAllowed ) {
141- return true ;
142- }
143-
144- if ( normalizedAllowed . startsWith ( "*." ) ) {
145- const baseDomain = normalizedAllowed . substring ( 2 ) ;
146- return hostname . endsWith ( "." + baseDomain ) || hostname === baseDomain ;
147- }
148-
149- return hostname . endsWith ( "." + normalizedAllowed ) ;
150- } ) ;
151-
152- if ( isAllowed ) {
153- return match ;
154- } else {
155- const truncated = hostname . length > 12 ? hostname . substring ( 0 , 12 ) + "..." : hostname ;
156- if ( typeof core !== "undefined" && core . info ) {
157- core . info ( `Redacted URL: ${ truncated } ` ) ;
158- }
159- if ( typeof core !== "undefined" && core . debug ) {
160- core . debug ( `Redacted URL (full): ${ match } ` ) ;
161- }
162- addRedactedDomain ( hostname ) ;
163- return "(redacted)" ;
164- }
165- } ) ;
166-
167- return result ;
168- }
169-
170- /**
171- * Sanitize URL protocols
172- * @param {string } s - The string to process
173- * @returns {string } Sanitized string
174- */
175- function sanitizeUrlProtocols ( s ) {
176- return s . replace ( / \b ( (?: h t t p | f t p | f i l e | s s h | g i t ) : \/ \/ ( [ \w . - ] + ) (?: [ ^ \s ] * ) | (?: d a t a | j a v a s c r i p t | v b s c r i p t | a b o u t | m a i l t o | t e l ) : [ ^ \s ] + ) / gi, ( match , _fullMatch , domain ) => {
177- // Extract domain for http/ftp/file/ssh/git protocols
178- if ( domain ) {
179- const domainLower = domain . toLowerCase ( ) ;
180- const truncated = domainLower . length > 12 ? domainLower . substring ( 0 , 12 ) + "..." : domainLower ;
181- if ( typeof core !== "undefined" && core . info ) {
182- core . info ( `Redacted URL: ${ truncated } ` ) ;
183- }
184- if ( typeof core !== "undefined" && core . debug ) {
185- core . debug ( `Redacted URL (full): ${ match } ` ) ;
186- }
187- addRedactedDomain ( domainLower ) ;
188- } else {
189- // For other protocols (data:, javascript:, etc.), track the protocol itself
190- const protocolMatch = match . match ( / ^ ( [ ^ : ] + ) : / ) ;
191- if ( protocolMatch ) {
192- const protocol = protocolMatch [ 1 ] + ":" ;
193- // Truncate the matched URL for logging (keep first 12 chars + "...")
194- const truncated = match . length > 12 ? match . substring ( 0 , 12 ) + "..." : match ;
195- if ( typeof core !== "undefined" && core . info ) {
196- core . info ( `Redacted URL: ${ truncated } ` ) ;
197- }
198- if ( typeof core !== "undefined" && core . debug ) {
199- core . debug ( `Redacted URL (full): ${ match } ` ) ;
200- }
201- addRedactedDomain ( protocol ) ;
202- }
203- }
204- return "(redacted)" ;
205- } ) ;
206- }
207-
208- /**
209- * Neutralize commands
210- * @param {string } s - The string to process
211- * @returns {string } Processed string
212- */
213- function neutralizeCommands ( s ) {
214- const commandName = process . env . GH_AW_COMMAND ;
215- if ( ! commandName ) {
216- return s ;
217- }
218-
219- const escapedCommand = commandName . replace ( / [ . * + ? ^ $ { } ( ) | [ \] \\ ] / g, "\\$&" ) ;
220- return s . replace ( new RegExp ( `^(\\s*)/(${ escapedCommand } )\\b` , "i" ) , "$1`/$2`" ) ;
221- }
222-
22395 /**
22496 * Neutralize @mentions with selective filtering
22597 * @param {string } s - The string to process
@@ -240,49 +112,6 @@ function sanitizeContent(content, maxLengthOrOptions) {
240112 return `${ p1 } \`@${ p2 } \`` ; // Neutralize the mention
241113 } ) ;
242114 }
243-
244- /**
245- * Remove XML comments
246- * @param {string } s - The string to process
247- * @returns {string } Processed string
248- */
249- function removeXmlComments ( s ) {
250- return s . replace ( / < ! - - [ \s \S ] * ?- - > / g, "" ) . replace ( / < ! - - [ \s \S ] * ?- - ! > / g, "" ) ;
251- }
252-
253- /**
254- * Convert XML tags
255- * @param {string } s - The string to process
256- * @returns {string } Processed string
257- */
258- function convertXmlTags ( s ) {
259- const allowedTags = [ "b" , "blockquote" , "br" , "code" , "details" , "em" , "h1" , "h2" , "h3" , "h4" , "h5" , "h6" , "hr" , "i" , "li" , "ol" , "p" , "pre" , "strong" , "sub" , "summary" , "sup" , "table" , "tbody" , "td" , "th" , "thead" , "tr" , "ul" ] ;
260-
261- s = s . replace ( / < ! \[ C D A T A \[ ( [ \s \S ] * ?) \] \] > / g, ( match , content ) => {
262- const convertedContent = content . replace ( / < ( \/ ? [ A - Z a - z ] [ A - Z a - z 0 - 9 ] * (?: [ ^ > ] * ?) ) > / g, "($1)" ) ;
263- return `(![CDATA[${ convertedContent } ]])` ;
264- } ) ;
265-
266- return s . replace ( / < ( \/ ? [ A - Z a - z ! ] [ ^ > ] * ?) > / g, ( match , tagContent ) => {
267- const tagNameMatch = tagContent . match ( / ^ \/ ? \s * ( [ A - Z a - z ] [ A - Z a - z 0 - 9 ] * ) / ) ;
268- if ( tagNameMatch ) {
269- const tagName = tagNameMatch [ 1 ] . toLowerCase ( ) ;
270- if ( allowedTags . includes ( tagName ) ) {
271- return match ;
272- }
273- }
274- return `(${ tagContent } )` ;
275- } ) ;
276- }
277-
278- /**
279- * Neutralize bot triggers
280- * @param {string } s - The string to process
281- * @returns {string } Processed string
282- */
283- function neutralizeBotTriggers ( s ) {
284- return s . replace ( / \b ( f i x e s ? | c l o s e s ? | r e s o l v e s ? | f i x | c l o s e | r e s o l v e ) \s + # ( \w + ) / gi, ( match , action , ref ) => `\`${ action } #${ ref } \`` ) ;
285- }
286115}
287116
288117module . exports = { sanitizeContent, getRedactedDomains, clearRedactedDomains, writeRedactedDomainsLog } ;
0 commit comments