diff --git a/devolutions-session/Cargo.toml b/devolutions-session/Cargo.toml index 5865bddf8..d03abd4b1 100644 --- a/devolutions-session/Cargo.toml +++ b/devolutions-session/Cargo.toml @@ -44,7 +44,7 @@ win-api-wrappers = { path = "../crates/win-api-wrappers", optional = true } [dependencies.now-proto-pdu] optional = true -version = "0.4.2" +# TODO: wait for merge and release https://github.com/Devolutions/now-proto/pull/62 features = ["std"] [target.'cfg(windows)'.dependencies] @@ -58,6 +58,7 @@ version = "0.61" optional = true features = [ "Win32_Foundation", + "Win32_Globalization", "Win32_System_Shutdown", "Win32_UI_Accessibility", "Win32_UI_WindowsAndMessaging", diff --git a/devolutions-session/src/dvc/encoding.rs b/devolutions-session/src/dvc/encoding.rs new file mode 100644 index 000000000..0a1d9e685 --- /dev/null +++ b/devolutions-session/src/dvc/encoding.rs @@ -0,0 +1,437 @@ +//! Console encoding transcoding for IO redirection. +//! +//! When IO redirection is enabled, the child process writes its stdout/stderr using +//! the console's OEM codepage (e.g., `cmd.exe`, `powershell.exe`, `pwsh.exe`). This module +//! provides transcoding between the process's native encoding and UTF-8, which is +//! the encoding used on the wire (NowProto). + +use std::borrow::Cow; + +use tracing::warn; +use windows::Win32::Globalization::{ + MB_ERR_INVALID_CHARS, MultiByteToWideChar, WC_NO_BEST_FIT_CHARS, WideCharToMultiByte, +}; + +/// The encoding used for IO data streams. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum DataEncoding { + /// Raw passthrough. No transcoding is performed. + Raw, + /// A Windows codepage that requires transcoding to/from UTF-8. + Codepage(u32), +} + +const CP_UTF8: u32 = 65001; + +impl DataEncoding { + /// Determine the OEM codepage encoding for the current system. + /// + /// Console applications (`cmd.exe`, `powershell.exe` 5.x, `pwsh.exe`) use the OEM + /// codepage for piped/redirected output. + pub fn from_oem_codepage() -> Self { + // SAFETY: FFI call without outstanding preconditions. + let cp = unsafe { windows::Win32::Globalization::GetOEMCP() }; + if cp == CP_UTF8 { Self::Raw } else { Self::Codepage(cp) } + } + + /// Returns true if no transcoding is needed. + pub fn is_raw(self) -> bool { + matches!(self, Self::Raw) + } + + /// Convert a UTF-8 string to bytes in this encoding. + /// + /// Returns borrowed bytes when no transcoding is needed (raw passthrough). + pub fn encode_str<'a>(self, text: &'a str) -> Cow<'a, [u8]> { + match self { + Self::Raw => Cow::Borrowed(text.as_bytes()), + Self::Codepage(cp) => Cow::Owned(convert_from_utf8(cp, text)), + } + } +} + +/// Stateful decoder that transcodes from a Windows codepage to UTF-8. +/// +/// Handles partial multi-byte characters that may be split across read chunks +/// (relevant for DBCS codepages like Shift-JIS, GBK, etc.). +pub struct OutputDecoder { + encoding: DataEncoding, + /// Leftover bytes from the previous chunk that form an incomplete multi-byte character. + leftover: Vec, +} + +impl OutputDecoder { + pub fn new(encoding: DataEncoding) -> Self { + Self { + encoding, + leftover: Vec::new(), + } + } + + /// Decode a chunk of bytes from the process encoding to UTF-8. + /// + /// Returns borrowed data when no transcoding is needed (raw passthrough). + /// Any incomplete trailing multi-byte character is buffered internally and + /// will be completed by the next call. + pub fn decode<'a>(&mut self, data: &'a [u8]) -> Cow<'a, [u8]> { + if self.encoding.is_raw() { + return Cow::Borrowed(data); + } + + let codepage = match self.encoding { + DataEncoding::Codepage(cp) => cp, + DataEncoding::Raw => unreachable!(), + }; + + // Prepend any leftover bytes from the previous chunk. + let input = if self.leftover.is_empty() { + Cow::Borrowed(data) + } else { + let mut combined = std::mem::take(&mut self.leftover); + combined.extend_from_slice(data); + Cow::Owned(combined) + }; + + if input.is_empty() { + return Cow::Owned(Vec::new()); + } + + // Try to convert the entire buffer. If the last bytes form an incomplete + // multi-byte character, we'll detect that and retry without those trailing bytes. + match convert_to_utf8(codepage, &input) { + Ok(utf8_bytes) => Cow::Owned(utf8_bytes), + Err(_) => { + // The conversion failed, likely due to an incomplete multi-byte sequence + // at the end. Try progressively shorter slices to find the boundary. + // For DBCS, at most 1 lead byte can be dangling. + let max_trim = input.len().min(4); + for trim in 1..=max_trim { + let end = input.len() - trim; + if end == 0 { + // Everything is leftover (very short input). + self.leftover = input.into_owned(); + return Cow::Owned(Vec::new()); + } + if let Ok(utf8_bytes) = convert_to_utf8(codepage, &input[..end]) { + self.leftover = input[end..].to_vec(); + return Cow::Owned(utf8_bytes); + } + } + + // If nothing works, the data is genuinely malformed. + warn!( + codepage, + "Failed to decode process output; data may contain invalid characters" + ); + self.leftover.clear(); + Cow::Owned(convert_to_utf8_lossy(codepage, &input)) + } + } + } + + /// Flush any remaining leftover bytes (call at EOF). + /// + /// If there are incomplete bytes remaining, they are converted lossily. + pub fn flush(&mut self) -> Vec { + if self.leftover.is_empty() || self.encoding.is_raw() { + return Vec::new(); + } + + let codepage = match self.encoding { + DataEncoding::Codepage(cp) => cp, + DataEncoding::Raw => unreachable!(), + }; + + let leftover = std::mem::take(&mut self.leftover); + convert_to_utf8_lossy(codepage, &leftover) + } +} + +/// Stateful encoder that transcodes from UTF-8 to a Windows codepage. +/// +/// Handles partial UTF-8 sequences that may be split across write chunks. +pub struct InputEncoder { + encoding: DataEncoding, + /// Leftover bytes from the previous chunk that form an incomplete UTF-8 sequence. + leftover: Vec, +} + +impl InputEncoder { + pub fn new(encoding: DataEncoding) -> Self { + Self { + encoding, + leftover: Vec::new(), + } + } + + /// Encode a chunk of UTF-8 bytes to the process encoding. + /// + /// Returns borrowed data when no transcoding is needed (raw passthrough). + /// Any incomplete trailing UTF-8 sequence is buffered internally and will + /// be completed by the next call. + pub fn encode<'a>(&mut self, data: &'a [u8]) -> Cow<'a, [u8]> { + if self.encoding.is_raw() { + return Cow::Borrowed(data); + } + + let codepage = match self.encoding { + DataEncoding::Codepage(cp) => cp, + DataEncoding::Raw => unreachable!(), + }; + + // Prepend any leftover bytes from the previous chunk. + let input = if self.leftover.is_empty() { + Cow::Borrowed(data) + } else { + let mut combined = std::mem::take(&mut self.leftover); + combined.extend_from_slice(data); + Cow::Owned(combined) + }; + + if input.is_empty() { + return Cow::Owned(Vec::new()); + } + + // Find the longest valid UTF-8 prefix. + let valid_end = find_valid_utf8_end(&input); + + if valid_end == 0 { + // All bytes are part of an incomplete sequence. + self.leftover = input.into_owned(); + return Cow::Owned(Vec::new()); + } + + // Save any trailing incomplete UTF-8 bytes. + if valid_end < input.len() { + self.leftover = input[valid_end..].to_vec(); + } + + let utf8_str = match std::str::from_utf8(&input[..valid_end]) { + Ok(s) => s, + Err(_) => { + // Should not happen since we validated above, but handle gracefully. + warn!("Unexpected invalid UTF-8 in stdin data"); + self.leftover.clear(); + return Cow::Borrowed(data); + } + }; + + Cow::Owned(convert_from_utf8(codepage, utf8_str)) + } + + /// Flush any remaining leftover bytes (call when stdin is closed). + pub fn flush(&mut self) -> Vec { + if self.leftover.is_empty() || self.encoding.is_raw() { + return Vec::new(); + } + + let codepage = match self.encoding { + DataEncoding::Codepage(cp) => cp, + DataEncoding::Raw => unreachable!(), + }; + + let leftover = std::mem::take(&mut self.leftover); + + // Try to interpret as UTF-8 with replacement. + let utf8_str = String::from_utf8_lossy(&leftover); + convert_from_utf8(codepage, &utf8_str) + } +} + +/// Find the end index of the longest valid UTF-8 prefix in `data`. +/// +/// Returns the byte index up to which the data is valid UTF-8. +/// Any trailing incomplete multi-byte sequence is excluded. +fn find_valid_utf8_end(data: &[u8]) -> usize { + match std::str::from_utf8(data) { + Ok(_) => data.len(), + Err(e) => { + // `valid_up_to()` gives us the position of the first invalid byte. + // If the error is due to an incomplete sequence at the end (not an + // invalid byte), `error_len()` returns None. + if e.error_len().is_none() { + // Incomplete sequence at end - return up to the valid portion. + e.valid_up_to() + } else { + // Genuinely invalid byte. Include up to that point. + // The caller will handle the leftover which includes the bad byte. + e.valid_up_to() + } + } + } +} + +/// Convert bytes from a Windows codepage to UTF-8 using Win32 API. +/// +/// Returns `Err` if the input contains an incomplete multi-byte character. +fn convert_to_utf8(codepage: u32, data: &[u8]) -> Result, ()> { + if data.is_empty() { + return Ok(Vec::new()); + } + + // First pass: get required buffer size for UTF-16 conversion. + // Using MB_ERR_INVALID_CHARS to detect incomplete sequences. + // SAFETY: `data` is a valid byte slice. + let wide_len = unsafe { MultiByteToWideChar(codepage, MB_ERR_INVALID_CHARS, data, None) }; + + if wide_len <= 0 { + return Err(()); + } + + #[expect(clippy::cast_sign_loss, reason = "wide_len is verified positive above")] + let wide_len = wide_len as usize; + + // Second pass: perform the actual conversion. + let mut wide_buf = vec![0u16; wide_len]; + + // SAFETY: `wide_buf` is properly sized and `data` is valid. + let written = unsafe { MultiByteToWideChar(codepage, MB_ERR_INVALID_CHARS, data, Some(&mut wide_buf)) }; + + if written <= 0 { + return Err(()); + } + + #[expect(clippy::cast_sign_loss, reason = "written is verified positive above")] + wide_buf.truncate(written as usize); + + // Convert UTF-16 to UTF-8. + Ok(String::from_utf16_lossy(&wide_buf).into_bytes()) +} + +/// Convert bytes from a Windows codepage to UTF-8, replacing invalid characters. +fn convert_to_utf8_lossy(codepage: u32, data: &[u8]) -> Vec { + if data.is_empty() { + return Vec::new(); + } + + // First pass: get required buffer size (without MB_ERR_INVALID_CHARS for lossy conversion). + // SAFETY: `data` is a valid byte slice. + let wide_len = unsafe { MultiByteToWideChar(codepage, Default::default(), data, None) }; + + if wide_len <= 0 { + return data.to_vec(); + } + + #[expect(clippy::cast_sign_loss, reason = "wide_len is verified positive above")] + let wide_len = wide_len as usize; + + let mut wide_buf = vec![0u16; wide_len]; + + // SAFETY: `wide_buf` is properly sized and `data` is valid. + let written = unsafe { MultiByteToWideChar(codepage, Default::default(), data, Some(&mut wide_buf)) }; + + if written <= 0 { + return data.to_vec(); + } + + #[expect(clippy::cast_sign_loss, reason = "written is verified positive above")] + wide_buf.truncate(written as usize); + + String::from_utf16_lossy(&wide_buf).into_bytes() +} + +/// Convert a UTF-8 string to a Windows codepage. +fn convert_from_utf8(codepage: u32, text: &str) -> Vec { + if text.is_empty() { + return Vec::new(); + } + + // First convert UTF-8 to UTF-16. + let wide: Vec = text.encode_utf16().collect(); + + // Get required buffer size. + // SAFETY: `wide` is a valid UTF-16 slice. + let mb_len = unsafe { WideCharToMultiByte(codepage, WC_NO_BEST_FIT_CHARS, &wide, None, None, None) }; + + if mb_len <= 0 { + return text.as_bytes().to_vec(); + } + + #[expect(clippy::cast_sign_loss, reason = "mb_len is verified positive above")] + let mb_len = mb_len as usize; + + let mut mb_buf = vec![0u8; mb_len]; + + // SAFETY: `mb_buf` is properly sized, `wide` is valid UTF-16. + let written = unsafe { WideCharToMultiByte(codepage, WC_NO_BEST_FIT_CHARS, &wide, Some(&mut mb_buf), None, None) }; + + if written <= 0 { + return text.as_bytes().to_vec(); + } + + #[expect(clippy::cast_sign_loss, reason = "written is verified positive above")] + mb_buf.truncate(written as usize); + mb_buf +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn raw_passthrough_decode() { + let mut decoder = OutputDecoder::new(DataEncoding::Raw); + let input = b"Hello, world!"; + let result = decoder.decode(input); + assert!(matches!(result, Cow::Borrowed(_))); + assert_eq!(&*result, input); + } + + #[test] + fn raw_passthrough_encode() { + let mut encoder = InputEncoder::new(DataEncoding::Raw); + let input = b"Hello, world!"; + let result = encoder.encode(input); + assert!(matches!(result, Cow::Borrowed(_))); + assert_eq!(&*result, input); + } + + #[test] + fn ascii_subset_works_for_any_codepage() { + // ASCII characters (0x00-0x7F) are the same in all codepages. + let encoding = DataEncoding::Codepage(437); + let mut decoder = OutputDecoder::new(encoding); + let input = b"Hello, world!\r\n"; + let output = decoder.decode(input); + assert_eq!(&*output, input.as_slice()); + } + + #[test] + fn encoder_ascii_subset_works_for_any_codepage() { + let encoding = DataEncoding::Codepage(437); + let mut encoder = InputEncoder::new(encoding); + let input = b"Hello, world!\r\n"; + let output = encoder.encode(input); + assert_eq!(&*output, input.as_slice()); + } + + #[test] + fn split_utf8_input_handled() { + let mut encoder = InputEncoder::new(DataEncoding::Codepage(437)); + + // é in UTF-8 is [0xC3, 0xA9]. Split across two chunks. + let chunk1 = &[0xC3u8]; + let chunk2 = &[0xA9u8]; + + let out1 = encoder.encode(chunk1); + // First chunk should produce nothing (incomplete UTF-8 sequence). + assert!(out1.is_empty()); + + let out2 = encoder.encode(chunk2); + // Second chunk should produce the encoded character. + assert!(!out2.is_empty()); + } + + #[test] + fn find_valid_utf8_end_complete() { + let data = "Hello".as_bytes(); + assert_eq!(find_valid_utf8_end(data), 5); + } + + #[test] + fn find_valid_utf8_end_incomplete() { + // "é" is [0xC3, 0xA9]. If we only have the lead byte: + let data = &[b'H', b'i', 0xC3]; + assert_eq!(find_valid_utf8_end(data), 2); + } +} diff --git a/devolutions-session/src/dvc/fs.rs b/devolutions-session/src/dvc/fs.rs index f3539b9f7..2c609c6e3 100644 --- a/devolutions-session/src/dvc/fs.rs +++ b/devolutions-session/src/dvc/fs.rs @@ -2,6 +2,8 @@ use std::path::PathBuf; use tracing::error; +use crate::dvc::encoding::DataEncoding; + /// Guard for created temporary file. Associated file is deleted on drop. pub struct TmpFileGuard(PathBuf); @@ -22,6 +24,23 @@ impl TmpFileGuard { Ok(()) } + /// Write content transcoded from UTF-8 to the specified encoding. + pub fn write_content_encoded(&self, content: &str, encoding: DataEncoding) -> anyhow::Result<()> { + let bytes = encoding.encode_str(content); + std::fs::write(&self.0, &*bytes)?; + Ok(()) + } + + /// Write content as UTF-8 with a BOM prefix (for Windows PowerShell 5.x script files). + pub fn write_content_utf8_bom(&self, content: &str) -> anyhow::Result<()> { + use std::io::Write as _; + + let mut file = std::fs::File::create(&self.0)?; + file.write_all(b"\xEF\xBB\xBF")?; + file.write_all(content.as_bytes())?; + Ok(()) + } + pub fn path(&self) -> &PathBuf { &self.0 } diff --git a/devolutions-session/src/dvc/mod.rs b/devolutions-session/src/dvc/mod.rs index 911696eff..728d10ac5 100644 --- a/devolutions-session/src/dvc/mod.rs +++ b/devolutions-session/src/dvc/mod.rs @@ -34,6 +34,7 @@ //! session result with error to the client (if possible). pub mod channel; +pub mod encoding; pub mod fs; pub mod io; pub mod now_message_dissector; diff --git a/devolutions-session/src/dvc/process.rs b/devolutions-session/src/dvc/process.rs index 148bbb8bf..c7c56f3fa 100644 --- a/devolutions-session/src/dvc/process.rs +++ b/devolutions-session/src/dvc/process.rs @@ -22,6 +22,7 @@ use windows::Win32::UI::WindowsAndMessaging::{SW_HIDE, WM_QUIT}; use windows::core::PCWSTR; use crate::dvc::channel::{WinapiSignaledReceiver, WinapiSignaledSender, winapi_signaled_mpsc_channel}; +use crate::dvc::encoding::{DataEncoding, InputEncoder, OutputDecoder}; use crate::dvc::env::make_environment_block; use crate::dvc::fs::TmpFileGuard; use crate::dvc::io::{IoRedirectionPipes, ensure_overlapped_io_result}; @@ -104,6 +105,8 @@ pub struct WinApiProcessCtx { stderr_read_pipe: Option, stdin_write_pipe: Option, + encoding: DataEncoding, + pid: u32, // NOTE: Order of fields is important, as process_handle must be dropped last in automatically @@ -215,7 +218,12 @@ impl WinApiProcessCtx { ) -> Result { let session_id = self.session_id; - info!(session_id, "Process IO redirection loop has started"); + info!(session_id, ?self.encoding, "Process IO redirection loop has started"); + + // Encoding transcoders for stdin/stdout/stderr. + let mut stdout_decoder = OutputDecoder::new(self.encoding); + let mut stderr_decoder = OutputDecoder::new(self.encoding); + let mut stdin_encoder = InputEncoder::new(self.encoding); // Events for overlapped IO let stdout_read_event = Event::new_unnamed()?; @@ -322,14 +330,47 @@ impl WinApiProcessCtx { } }; - let mut bytes_written: u32 = 0; - - // Send data to stdin pipe in a blocking maner. - // SAFETY: pipe is valid to write to, as long as it is not closed. - unsafe { WriteFile(pipe_handle, Some(&data), Some(&mut bytes_written as *mut _), None) }?; + // Transcode UTF-8 input to the process encoding. + let encoded_data = stdin_encoder.encode(&data); + + if !encoded_data.is_empty() { + let mut bytes_written: u32 = 0; + + // Send data to stdin pipe in a blocking manner. + // SAFETY: pipe and input buffer are ensured to be valid via + // borrowing rules. + unsafe { + WriteFile( + pipe_handle, + Some(&*encoded_data), + Some(&mut bytes_written as *mut _), + None, + ) + }?; + } if last { - // Close stdin pipe + // Flush any remaining encoder state before closing. + let flushed = stdin_encoder.flush(); + if !flushed.is_empty() { + let mut bytes_written: u32 = 0; + // SAFETY: pipe and input buffer are ensured to be valid via + // borrowing rules. + unsafe { + WriteFile( + self.stdin_write_pipe + .as_ref() + .expect("BUG: stdin pipe closed unexpectedly") + .handle + .raw(), + Some(&flushed), + Some(&mut bytes_written as *mut _), + None, + ) + }?; + } + + // Close stdin pipe. self.stdin_write_pipe = None; } } @@ -371,6 +412,17 @@ impl WinApiProcessCtx { // EOF on stdout pipe, close it and send EOF message to message_tx self.stdout_read_pipe = None; + // Flush any remaining decoder state at EOF. + let flushed = stdout_decoder.flush(); + if !flushed.is_empty() { + io_notification_tx.blocking_send(ServerChannelEvent::SessionDataOut { + session_id, + stream: NowExecDataStreamKind::Stdout, + last: false, + data: flushed, + })?; + } + io_notification_tx.blocking_send(ServerChannelEvent::SessionDataOut { session_id, stream: NowExecDataStreamKind::Stdout, @@ -383,12 +435,18 @@ impl WinApiProcessCtx { continue; } - io_notification_tx.blocking_send(ServerChannelEvent::SessionDataOut { - session_id, - stream: NowExecDataStreamKind::Stdout, - last: false, - data: stdout_buffer[..bytes_read as usize].to_vec(), - })?; + // Transcode stdout from process encoding to UTF-8. + let raw_len = bytes_read as usize; + let decoded_data = stdout_decoder.decode(&stdout_buffer[..raw_len]); + + if !decoded_data.is_empty() { + io_notification_tx.blocking_send(ServerChannelEvent::SessionDataOut { + session_id, + stream: NowExecDataStreamKind::Stdout, + last: false, + data: decoded_data.into_owned(), + })?; + } // Schedule next overlapped read // SAFETY: pipe is valid to read from, as long as it is not closed. @@ -432,6 +490,18 @@ impl WinApiProcessCtx { ERROR_HANDLE_EOF | ERROR_BROKEN_PIPE => { // EOF on stderr pipe, close it and send EOF message to message_tx self.stderr_read_pipe = None; + + // Flush any remaining decoder state at EOF. + let flushed = stderr_decoder.flush(); + if !flushed.is_empty() { + io_notification_tx.blocking_send(ServerChannelEvent::SessionDataOut { + session_id, + stream: NowExecDataStreamKind::Stderr, + last: false, + data: flushed, + })?; + } + io_notification_tx.blocking_send(ServerChannelEvent::SessionDataOut { session_id, stream: NowExecDataStreamKind::Stderr, @@ -444,12 +514,17 @@ impl WinApiProcessCtx { continue; } - io_notification_tx.blocking_send(ServerChannelEvent::SessionDataOut { - session_id, - stream: NowExecDataStreamKind::Stderr, - last: false, - data: stderr_buffer[..bytes_read as usize].to_vec(), - })?; + // Transcode stderr from process encoding to UTF-8. + let decoded_data = stderr_decoder.decode(&stderr_buffer[..bytes_read as usize]); + + if !decoded_data.is_empty() { + io_notification_tx.blocking_send(ServerChannelEvent::SessionDataOut { + session_id, + stream: NowExecDataStreamKind::Stderr, + last: false, + data: decoded_data.into_owned(), + })?; + } // Schedule next overlapped read // SAFETY: pipe is valid to read from, as long as it is not closed. @@ -479,6 +554,7 @@ pub struct WinApiProcessBuilder { command_line: String, current_directory: String, enable_io_redirection: bool, + encoding: DataEncoding, env: HashMap, temp_files: Vec, } @@ -490,6 +566,7 @@ impl WinApiProcessBuilder { command_line: String::new(), current_directory: String::new(), enable_io_redirection: false, + encoding: DataEncoding::from_oem_codepage(), env: HashMap::new(), temp_files: Vec::new(), } @@ -525,6 +602,12 @@ impl WinApiProcessBuilder { self } + #[must_use] + pub fn with_encoding(mut self, encoding: DataEncoding) -> Self { + self.encoding = encoding; + self + } + fn run_impl( mut self, session_id: u32, @@ -553,11 +636,12 @@ impl WinApiProcessBuilder { let temp_files = std::mem::take(&mut self.temp_files); let io_redirection = self.enable_io_redirection; + let encoding = self.encoding; let process_ctx = if io_redirection { - prepare_process_with_io_redirection(session_id, command_line, current_directory, self.env)? + prepare_process_with_io_redirection(session_id, command_line, current_directory, self.env, encoding)? } else { - prepare_process(session_id, command_line, current_directory, self.env)? + prepare_process(session_id, command_line, current_directory, self.env, encoding)? }; // For detached mode, spawn a thread that waits for process exit and keeps temp files alive @@ -636,6 +720,7 @@ fn prepare_process( mut command_line: WideString, current_directory: WideString, env: HashMap, + encoding: DataEncoding, ) -> Result { let mut process_information = PROCESS_INFORMATION::default(); @@ -689,6 +774,7 @@ fn prepare_process( stdout_read_pipe: None, stderr_read_pipe: None, stdin_write_pipe: None, + encoding, pid, process: process_handle, }) @@ -699,6 +785,7 @@ fn prepare_process_with_io_redirection( mut command_line: WideString, current_directory: WideString, env: HashMap, + encoding: DataEncoding, ) -> Result { let mut process_information = PROCESS_INFORMATION::default(); @@ -771,6 +858,7 @@ fn prepare_process_with_io_redirection( stdout_read_pipe: Some(stdout_read_pipe), stderr_read_pipe: Some(stderr_read_pipe), stdin_write_pipe: Some(stdin_write_pipe), + encoding, pid, process: process_handle, }; diff --git a/devolutions-session/src/dvc/task.rs b/devolutions-session/src/dvc/task.rs index f1eb21275..21e47cba8 100644 --- a/devolutions-session/src/dvc/task.rs +++ b/devolutions-session/src/dvc/task.rs @@ -36,6 +36,7 @@ use windows::Win32::UI::WindowsAndMessaging::{ use windows::core::PCWSTR; use crate::dvc::channel::{WinapiSignaledSender, bounded_mpsc_channel, winapi_signaled_mpsc_channel}; +use crate::dvc::encoding::DataEncoding; use crate::dvc::fs::TmpFileGuard; use crate::dvc::io::run_dvc_io; use crate::dvc::process::{ExecError, ServerChannelEvent, WinApiProcess, WinApiProcessBuilder}; @@ -282,7 +283,8 @@ fn default_server_caps() -> NowChannelCapsetMsg { | NowExecCapsetFlags::STYLE_BATCH | NowExecCapsetFlags::STYLE_PWSH | NowExecCapsetFlags::STYLE_WINPS - | NowExecCapsetFlags::IO_REDIRECTION, + | NowExecCapsetFlags::IO_REDIRECTION + | NowExecCapsetFlags::UNICODE_CONSOLE, ) } @@ -617,7 +619,15 @@ impl MessageProcessor { async fn process_exec_process(&mut self, exec_msg: NowExecProcessMsg<'_>) -> Result<(), ExecError> { self.ensure_session_id_free(exec_msg.session_id()).await?; - let mut run_process = WinApiProcessBuilder::new(exec_msg.filename()); + // Process exec: no assumptions about encoding by default (raw passthrough). + // When ENCODING_UTF8 is set, transcode IO between OEM codepage and UTF-8. + let io_encoding = if exec_msg.is_encoding_utf8() { + DataEncoding::from_oem_codepage() + } else { + DataEncoding::Raw + }; + + let mut run_process = WinApiProcessBuilder::new(exec_msg.filename()).with_encoding(io_encoding); if let Some(parameters) = exec_msg.parameters() { run_process = run_process.with_command_line(parameters); @@ -646,23 +656,36 @@ impl MessageProcessor { async fn process_exec_batch(&mut self, batch_msg: NowExecBatchMsg<'_>) -> Result<(), ExecError> { self.ensure_session_id_free(batch_msg.session_id()).await?; + let mut script = batch_msg.command().to_owned(); + + // Batch exec encoding logic: + // - Default: transcode OEM ↔ UTF-8 + // - RAW_ENCODING: pass through raw bytes without transcoding. + // - UNICODE_CONSOLE: inject `@chcp 65001`, write file as BOM-less UTF-8, IO passthrough. + let (file_encoding, io_encoding) = if batch_msg.is_unicode_console() { + script = format!("@chcp 65001 > nul\r\n{}", script); + (DataEncoding::Raw, DataEncoding::Raw) + } else if batch_msg.is_raw_encoding() { + (DataEncoding::from_oem_codepage(), DataEncoding::Raw) + } else { + (DataEncoding::from_oem_codepage(), DataEncoding::from_oem_codepage()) + }; + let tmp_file = TmpFileGuard::new("bat")?; - tmp_file.write_content(batch_msg.command())?; + tmp_file.write_content_encoded(&script, file_encoding)?; - // "/Q" - Turns command echo off. - // "/C" - Carries out the command specified by string and then terminates. let parameters = format!("/Q /C \"{}\"", tmp_file.path_string()); let mut run_batch = WinApiProcessBuilder::new("cmd.exe") .with_temp_file(tmp_file) - .with_command_line(¶meters); + .with_command_line(¶meters) + .with_encoding(io_encoding); if let Some(directory) = batch_msg.directory() { run_batch = run_batch.with_current_directory(directory); } if batch_msg.is_detached() { - // Detached mode: fire-and-forget, no IO redirection run_batch.run_detached(batch_msg.session_id())?; self.send_detached_process_success(batch_msg.session_id()).await?; return Ok(()); @@ -681,28 +704,41 @@ impl MessageProcessor { self.ensure_session_id_free(winps_msg.session_id()).await?; let mut params = Vec::new(); - append_ps_args(&mut params, &winps_msg); + // WinPs exec encoding logic: + // - Script file is ALWAYS written as UTF-8 with BOM (PS 5.x needs BOM to detect UTF-8). + // - Default IO: transcode OEM ↔ UTF-8 + // - RAW_ENCODING or UNICODE_CONSOLE: pass through raw bytes (UTF-8 passthrough). + let io_encoding = if winps_msg.is_unicode_console() || winps_msg.is_raw_encoding() { + DataEncoding::Raw + } else { + DataEncoding::from_oem_codepage() + }; + let tmp_file = if winps_msg.is_server_mode() { - // IMPORTANT: It is absolutely necessary to pass "-s" as the last parameter to make - // PowerShell run in server mode. params.push("-s".to_owned()); None } else { + let mut script = winps_msg.command().to_owned(); + if winps_msg.is_unicode_console() { + // Inject UTF-8 encoding setup for Windows PowerShell. + script = format!( + "$OutputEncoding = [Console]::InputEncoding = [Console]::OutputEncoding = [System.Text.UTF8Encoding]::new()\r\n{}", + script + ); + } let tmp_file = TmpFileGuard::new("ps1")?; - tmp_file.write_content(winps_msg.command())?; - - // "-Command" runs script without command echo and terminates. + tmp_file.write_content_utf8_bom(&script)?; params.push("-Command".to_owned()); params.push(format!("\"{}\"", tmp_file.path_string())); - Some(tmp_file) }; let params_str = params.join(" "); - - let mut run_process = WinApiProcessBuilder::new("powershell.exe").with_command_line(¶ms_str); + let mut run_process = WinApiProcessBuilder::new("powershell.exe") + .with_command_line(¶ms_str) + .with_encoding(io_encoding); if let Some(tmp_file) = tmp_file { run_process = run_process.with_temp_file(tmp_file); @@ -713,7 +749,6 @@ impl MessageProcessor { } if winps_msg.is_detached() { - // Detached mode: fire-and-forget, no IO redirection run_process.run_detached(winps_msg.session_id())?; self.send_detached_process_success(winps_msg.session_id()).await?; return Ok(()); @@ -728,55 +763,66 @@ impl MessageProcessor { Ok(()) } - async fn process_exec_pwsh(&mut self, winps_msg: NowExecPwshMsg<'_>) -> Result<(), ExecError> { - self.ensure_session_id_free(winps_msg.session_id()).await?; + async fn process_exec_pwsh(&mut self, pwsh_msg: NowExecPwshMsg<'_>) -> Result<(), ExecError> { + self.ensure_session_id_free(pwsh_msg.session_id()).await?; let mut params = Vec::new(); + append_pwsh_args(&mut params, &pwsh_msg); + + // Pwsh exec encoding logic (mirrors winps behavior): + // - Script file is written as plain UTF-8 (pwsh reads UTF-8 natively, no BOM needed). + // - Default IO: transcode OEM ↔ UTF-8 (pwsh still uses OEM encoding for console). + // - RAW_ENCODING or UNICODE_CONSOLE: pass through raw bytes (UTF-8 passthrough). + let io_encoding = if pwsh_msg.is_unicode_console() || pwsh_msg.is_raw_encoding() { + DataEncoding::Raw + } else { + DataEncoding::from_oem_codepage() + }; - append_pwsh_args(&mut params, &winps_msg); - - let tmp_file = if winps_msg.is_server_mode() { - // IMPORTANT: It is absolutely necessary to pass "-s" as the last parameter to make - // PowerShell run in server mode. + let tmp_file = if pwsh_msg.is_server_mode() { params.push("-s".to_owned()); None } else { + let mut script = pwsh_msg.command().to_owned(); + if pwsh_msg.is_unicode_console() { + // Inject UTF-8 encoding setup for PowerShell 7+. + script = format!( + "$OutputEncoding = [Console]::InputEncoding = [Console]::OutputEncoding = [System.Text.UTF8Encoding]::new()\r\n{}", + script + ); + } let tmp_file = TmpFileGuard::new("ps1")?; - tmp_file.write_content(winps_msg.command())?; - - // "-Command" runs script without command echo and terminates. + tmp_file.write_content_encoded(&script, DataEncoding::Raw)?; params.push("-Command".to_owned()); params.push(format!("\"{}\"", tmp_file.path_string())); - Some(tmp_file) }; let params_str = params.join(" "); - let mut run_process = WinApiProcessBuilder::new("pwsh.exe") .with_command_line(¶ms_str) - .with_env("NO_COLOR", "1"); // Suppress ANSI escape codes in pwsh output. + .with_encoding(io_encoding) + .with_env("NO_COLOR", "1"); if let Some(tmp_file) = tmp_file { run_process = run_process.with_temp_file(tmp_file); } - if let Some(directory) = winps_msg.directory() { + if let Some(directory) = pwsh_msg.directory() { run_process = run_process.with_current_directory(directory); } - if winps_msg.is_detached() { - // Detached mode: fire-and-forget, no IO redirection - run_process.run_detached(winps_msg.session_id())?; - self.send_detached_process_success(winps_msg.session_id()).await?; + if pwsh_msg.is_detached() { + run_process.run_detached(pwsh_msg.session_id())?; + self.send_detached_process_success(pwsh_msg.session_id()).await?; return Ok(()); } let process = run_process - .with_io_redirection(winps_msg.is_with_io_redirection()) - .run(winps_msg.session_id(), self.io_notification_tx.clone())?; + .with_io_redirection(pwsh_msg.is_with_io_redirection()) + .run(pwsh_msg.session_id(), self.io_notification_tx.clone())?; - self.sessions.insert(winps_msg.session_id(), process); + self.sessions.insert(pwsh_msg.session_id(), process); Ok(()) }