feat: encode SFTP paths on Windows in a revised WTF-8 (#3238)

This commit is contained in:
三咲雅 misaki masa 2025-10-13 19:50:46 +08:00 committed by GitHub
parent 554cb52cc5
commit c68e2df8c0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
81 changed files with 1393 additions and 459 deletions

View file

@ -14,6 +14,3 @@ parking_lot = { workspace = true }
russh = { workspace = true }
serde = { workspace = true }
tokio = { workspace = true }
[target."cfg(windows)".dependencies]
os_str_bytes = { version = "7.1.1", default-features = false, features = [ "conversions" ] }

View file

@ -2,6 +2,8 @@ use std::{borrow::Cow, ffi::{OsStr, OsString}, ops::Deref, path::{Path, PathBuf}
use serde::{Deserialize, Serialize};
use crate::Error;
#[derive(Debug, Default, Deserialize, Serialize)]
pub struct ByteStr<'a>(Cow<'a, [u8]>);
@ -19,32 +21,6 @@ impl<'a> From<&'a ByteStr<'a>> for ByteStr<'a> {
fn from(value: &'a ByteStr) -> Self { ByteStr(Cow::Borrowed(&value.0)) }
}
impl<'a> From<&'a OsStr> for ByteStr<'a> {
fn from(value: &'a OsStr) -> Self {
#[cfg(unix)]
{
use std::os::unix::ffi::OsStrExt;
ByteStr(Cow::Borrowed(value.as_bytes()))
}
#[cfg(windows)]
{
use os_str_bytes::OsStrBytes;
ByteStr(value.to_raw_bytes())
}
}
}
impl<'a> From<&'a Path> for ByteStr<'a> {
fn from(value: &'a Path) -> Self { ByteStr::from(value.as_os_str()) }
}
impl<'a, T> From<&'a T> for ByteStr<'a>
where
T: AsRef<Path>,
{
fn from(value: &'a T) -> Self { Self::from(value.as_ref()) }
}
impl PartialEq<&str> for ByteStr<'_> {
fn eq(&self, other: &&str) -> bool { self.0 == other.as_bytes() }
}
@ -58,8 +34,7 @@ impl<'a> ByteStr<'a> {
}
#[cfg(windows)]
{
use os_str_bytes::OsStrBytes;
OsStr::assert_from_raw_bytes(self.0.as_ref())
super::wtf::bytes_to_wide(self.0.as_ref())
}
}
@ -71,8 +46,10 @@ impl<'a> ByteStr<'a> {
}
#[cfg(windows)]
{
use os_str_bytes::OsStrBytes;
OsStr::assert_from_raw_bytes(self.0).into_owned()
match super::wtf::bytes_to_wide(self.0.as_ref()) {
Cow::Borrowed(_) => unsafe { String::from_utf8_unchecked(self.0.into_owned()) }.into(),
Cow::Owned(s) => s,
}
}
}
@ -107,3 +84,31 @@ impl<'a> ByteStr<'a> {
Self(Cow::Borrowed(bytes))
}
}
// --- Traits
pub trait ToByteStr<'a> {
fn to_byte_str(self) -> Result<ByteStr<'a>, Error>;
}
impl<'a, T> ToByteStr<'a> for &'a T
where
T: AsRef<Path> + ?Sized,
{
fn to_byte_str(self) -> Result<ByteStr<'a>, Error> {
#[cfg(unix)]
{
use std::os::unix::ffi::OsStrExt;
Ok(ByteStr(Cow::Borrowed(self.as_ref().as_os_str().as_bytes())))
}
#[cfg(windows)]
{
super::wtf::wide_to_bytes(self.as_ref().as_os_str())
.ok_or(Error::custom("failed to convert wide path to bytes"))
.map(ByteStr)
}
}
}
impl<'a> ToByteStr<'a> for &'a ByteStr<'a> {
fn to_byte_str(self) -> Result<ByteStr<'a>, Error> { Ok(ByteStr(Cow::Borrowed(&self.0))) }
}

View file

@ -13,6 +13,8 @@ mod operator;
mod packet;
mod ser;
mod session;
#[cfg(windows)]
mod wtf;
pub use byte_str::*;
pub(crate) use de::*;

View file

@ -3,7 +3,7 @@ use std::{ops::Deref, path::PathBuf, sync::Arc};
use russh::{ChannelStream, client::Msg};
use tokio::sync::oneshot;
use crate::{ByteStr, Error, Packet, Session, fs::{Attrs, File, Flags, ReadDir}, requests, responses};
use crate::{ByteStr, Error, Packet, Session, ToByteStr, fs::{Attrs, File, Flags, ReadDir}, requests, responses};
pub struct Operator(Arc<Session>);
@ -28,9 +28,9 @@ impl Operator {
pub async fn open<'a, P>(&self, path: P, flags: Flags, attrs: &'a Attrs) -> Result<File, Error>
where
P: Into<ByteStr<'a>>,
P: ToByteStr<'a>,
{
let handle: responses::Handle = self.send(requests::Open::new(path, flags, attrs)).await?;
let handle: responses::Handle = self.send(requests::Open::new(path, flags, attrs)?).await?;
Ok(File::new(&self.0, handle.handle))
}
@ -59,9 +59,9 @@ impl Operator {
pub async fn lstat<'a, P>(&self, path: P) -> Result<Attrs, Error>
where
P: Into<ByteStr<'a>>,
P: ToByteStr<'a>,
{
let attrs: responses::Attrs = self.send(requests::Lstat::new(path)).await?;
let attrs: responses::Attrs = self.send(requests::Lstat::new(path)?).await?;
Ok(attrs.attrs)
}
@ -72,9 +72,9 @@ impl Operator {
pub async fn setstat<'a, P>(&self, path: P, attrs: Attrs) -> Result<(), Error>
where
P: Into<ByteStr<'a>>,
P: ToByteStr<'a>,
{
let status: responses::Status = self.send(requests::SetStat::new(path, attrs)).await?;
let status: responses::Status = self.send(requests::SetStat::new(path, attrs)?).await?;
status.into()
}
@ -83,42 +83,45 @@ impl Operator {
status.into()
}
pub async fn read_dir<'a>(&'a self, dir: impl Into<ByteStr<'a>>) -> Result<ReadDir, Error> {
let dir: ByteStr = dir.into();
let handle: responses::Handle = self.send(requests::OpenDir::new(&dir)).await?;
pub async fn read_dir<'a, P>(&'a self, dir: P) -> Result<ReadDir, Error>
where
P: ToByteStr<'a>,
{
let dir: ByteStr = dir.to_byte_str()?;
let handle: responses::Handle = self.send(requests::OpenDir::new(&dir)?).await?;
Ok(ReadDir::new(&self.0, dir, handle.handle))
}
pub async fn remove<'a, P>(&self, path: P) -> Result<(), Error>
where
P: Into<ByteStr<'a>>,
P: ToByteStr<'a>,
{
let status: responses::Status = self.send(requests::Remove::new(path)).await?;
let status: responses::Status = self.send(requests::Remove::new(path)?).await?;
status.into()
}
pub async fn mkdir<'a, P>(&self, path: P, attrs: Attrs) -> Result<(), Error>
where
P: Into<ByteStr<'a>>,
P: ToByteStr<'a>,
{
let status: responses::Status = self.send(requests::Mkdir::new(path, attrs)).await?;
let status: responses::Status = self.send(requests::Mkdir::new(path, attrs)?).await?;
status.into()
}
pub async fn rmdir<'a, P>(&self, path: P) -> Result<(), Error>
where
P: Into<ByteStr<'a>>,
P: ToByteStr<'a>,
{
let status: responses::Status = self.send(requests::Rmdir::new(path)).await?;
let status: responses::Status = self.send(requests::Rmdir::new(path)?).await?;
status.into()
}
pub async fn realpath<'a, P>(&self, path: P) -> Result<PathBuf, Error>
where
P: Into<ByteStr<'a>>,
P: ToByteStr<'a>,
{
let mut name: responses::Name = self.send(requests::Realpath::new(path)).await?;
let mut name: responses::Name = self.send(requests::Realpath::new(path)?).await?;
if name.items.is_empty() {
Err(Error::custom("realpath returned no names"))
} else {
@ -128,26 +131,26 @@ impl Operator {
pub async fn stat<'a, P>(&self, path: P) -> Result<Attrs, Error>
where
P: Into<ByteStr<'a>>,
P: ToByteStr<'a>,
{
let attrs: responses::Attrs = self.send(requests::Stat::new(path)).await?;
let attrs: responses::Attrs = self.send(requests::Stat::new(path)?).await?;
Ok(attrs.attrs)
}
pub async fn rename<'a, F, T>(&self, from: F, to: T) -> Result<(), Error>
where
F: Into<ByteStr<'a>>,
T: Into<ByteStr<'a>>,
F: ToByteStr<'a>,
T: ToByteStr<'a>,
{
let status: responses::Status = self.send(requests::Rename::new(from, to)).await?;
let status: responses::Status = self.send(requests::Rename::new(from, to)?).await?;
status.into()
}
pub async fn readlink<'a, P>(&self, path: P) -> Result<PathBuf, Error>
where
P: Into<ByteStr<'a>>,
P: ToByteStr<'a>,
{
let mut name: responses::Name = self.send(requests::Readlink::new(path)).await?;
let mut name: responses::Name = self.send(requests::Readlink::new(path)?).await?;
if name.items.is_empty() {
Err(Error::custom("readlink returned no names"))
} else {
@ -157,10 +160,10 @@ impl Operator {
pub async fn symlink<'a, L, O>(&self, original: O, link: L) -> Result<(), Error>
where
O: Into<ByteStr<'a>>,
L: Into<ByteStr<'a>>,
O: ToByteStr<'a>,
L: ToByteStr<'a>,
{
let status: responses::Status = self.send(requests::Symlink::new(original, link)).await?;
let status: responses::Status = self.send(requests::Symlink::new(original, link)?).await?;
status.into()
}
@ -175,14 +178,14 @@ impl Operator {
pub async fn hardlink<'a, O, L>(&self, original: O, link: L) -> Result<(), Error>
where
O: Into<ByteStr<'a>>,
L: Into<ByteStr<'a>>,
O: ToByteStr<'a>,
L: ToByteStr<'a>,
{
if self.extensions.lock().get("hardlink@openssh.com").is_none_or(|s| s != "1") {
return Err(Error::Unsupported);
}
let data = requests::ExtendedHardlink::new(original, link);
let data = requests::ExtendedHardlink::new(original, link)?;
let status: responses::Status =
self.send(requests::Extended::new("hardlink@openssh.com", data)).await?;
status.into()

View file

@ -2,7 +2,7 @@ use std::{borrow::Cow, fmt::Debug};
use serde::{Deserialize, Serialize};
use crate::ByteStr;
use crate::{ByteStr, Error, ToByteStr};
#[derive(Debug, Deserialize, Serialize)]
pub struct Extended<'a, D> {
@ -49,12 +49,12 @@ pub struct ExtendedHardlink<'a> {
}
impl<'a> ExtendedHardlink<'a> {
pub fn new<O, L>(original: O, link: L) -> Self
pub fn new<O, L>(original: O, link: L) -> Result<Self, Error>
where
O: Into<ByteStr<'a>>,
L: Into<ByteStr<'a>>,
O: ToByteStr<'a>,
L: ToByteStr<'a>,
{
Self { original: original.into(), link: link.into() }
Ok(Self { original: original.to_byte_str()?, link: link.to_byte_str()? })
}
}

View file

@ -1,6 +1,6 @@
use serde::{Deserialize, Serialize};
use crate::ByteStr;
use crate::{ByteStr, Error, ToByteStr};
#[derive(Debug, Deserialize, Serialize)]
pub struct Lstat<'a> {
@ -9,7 +9,12 @@ pub struct Lstat<'a> {
}
impl Lstat<'_> {
pub fn new<'a>(path: impl Into<ByteStr<'a>>) -> Lstat<'a> { Lstat { id: 0, path: path.into() } }
pub fn new<'a, P>(path: P) -> Result<Lstat<'a>, Error>
where
P: ToByteStr<'a>,
{
Ok(Lstat { id: 0, path: path.to_byte_str()? })
}
pub fn len(&self) -> usize { size_of_val(&self.id) + 4 + self.path.len() }
}

View file

@ -1,6 +1,6 @@
use serde::{Deserialize, Serialize};
use crate::{ByteStr, fs::Attrs};
use crate::{ByteStr, Error, ToByteStr, fs::Attrs};
#[derive(Debug, Deserialize, Serialize)]
pub struct Mkdir<'a> {
@ -10,11 +10,11 @@ pub struct Mkdir<'a> {
}
impl<'a> Mkdir<'a> {
pub fn new<P>(path: P, attrs: Attrs) -> Self
pub fn new<P>(path: P, attrs: Attrs) -> Result<Self, Error>
where
P: Into<ByteStr<'a>>,
P: ToByteStr<'a>,
{
Self { id: 0, path: path.into(), attrs }
Ok(Self { id: 0, path: path.to_byte_str()?, attrs })
}
pub fn len(&self) -> usize { size_of_val(&self.id) + 4 + self.path.len() + self.attrs.len() }

View file

@ -2,7 +2,7 @@ use std::borrow::Cow;
use serde::{Deserialize, Serialize};
use crate::{ByteStr, fs::{Attrs, Flags}};
use crate::{ByteStr, Error, ToByteStr, fs::{Attrs, Flags}};
#[derive(Debug, Deserialize, Serialize)]
pub struct Open<'a> {
@ -13,11 +13,11 @@ pub struct Open<'a> {
}
impl<'a> Open<'a> {
pub fn new<P>(path: P, flags: Flags, attrs: &'a Attrs) -> Self
pub fn new<P>(path: P, flags: Flags, attrs: &'a Attrs) -> Result<Self, Error>
where
P: Into<ByteStr<'a>>,
P: ToByteStr<'a>,
{
Self { id: 0, path: path.into(), flags, attrs: Cow::Borrowed(attrs) }
Ok(Self { id: 0, path: path.to_byte_str()?, flags, attrs: Cow::Borrowed(attrs) })
}
pub fn len(&self) -> usize {

View file

@ -1,6 +1,6 @@
use serde::{Deserialize, Serialize};
use crate::ByteStr;
use crate::{ByteStr, Error, ToByteStr};
#[derive(Debug, Deserialize, Serialize)]
pub struct OpenDir<'a> {
@ -9,7 +9,12 @@ pub struct OpenDir<'a> {
}
impl<'a> OpenDir<'a> {
pub fn new(path: impl Into<ByteStr<'a>>) -> Self { Self { id: 0, path: path.into() } }
pub fn new<P>(path: P) -> Result<Self, Error>
where
P: ToByteStr<'a>,
{
Ok(Self { id: 0, path: path.to_byte_str()? })
}
pub fn len(&self) -> usize { size_of_val(&self.id) + 4 + self.path.len() }
}

View file

@ -1,6 +1,6 @@
use serde::{Deserialize, Serialize};
use crate::ByteStr;
use crate::{ByteStr, Error, ToByteStr};
#[derive(Debug, Deserialize, Serialize)]
pub struct Readlink<'a> {
@ -9,7 +9,12 @@ pub struct Readlink<'a> {
}
impl<'a> Readlink<'a> {
pub fn new(path: impl Into<ByteStr<'a>>) -> Self { Self { id: 0, path: path.into() } }
pub fn new<P>(path: P) -> Result<Self, Error>
where
P: ToByteStr<'a>,
{
Ok(Self { id: 0, path: path.to_byte_str()? })
}
pub fn len(&self) -> usize { size_of_val(&self.id) + 4 + self.path.len() }
}

View file

@ -1,6 +1,6 @@
use serde::{Deserialize, Serialize};
use crate::ByteStr;
use crate::{ByteStr, Error, ToByteStr};
#[derive(Debug, Deserialize, Serialize)]
pub struct Realpath<'a> {
@ -9,7 +9,12 @@ pub struct Realpath<'a> {
}
impl<'a> Realpath<'a> {
pub fn new(path: impl Into<ByteStr<'a>>) -> Self { Self { id: 0, path: path.into() } }
pub fn new<P>(path: P) -> Result<Self, Error>
where
P: ToByteStr<'a>,
{
Ok(Self { id: 0, path: path.to_byte_str()? })
}
pub fn len(&self) -> usize { size_of_val(&self.id) + 4 + self.path.len() }
}

View file

@ -1,6 +1,6 @@
use serde::{Deserialize, Serialize};
use crate::ByteStr;
use crate::{ByteStr, Error, ToByteStr};
#[derive(Debug, Deserialize, Serialize)]
pub struct Remove<'a> {
@ -9,7 +9,12 @@ pub struct Remove<'a> {
}
impl<'a> Remove<'a> {
pub fn new(path: impl Into<ByteStr<'a>>) -> Self { Self { id: 0, path: path.into() } }
pub fn new<P>(path: P) -> Result<Self, Error>
where
P: ToByteStr<'a>,
{
Ok(Self { id: 0, path: path.to_byte_str()? })
}
pub fn len(&self) -> usize { size_of_val(&self.id) + 4 + self.path.len() }
}

View file

@ -1,6 +1,6 @@
use serde::{Deserialize, Serialize};
use crate::ByteStr;
use crate::{ByteStr, Error, ToByteStr};
#[derive(Debug, Deserialize, Serialize)]
pub struct Rename<'a> {
@ -10,12 +10,12 @@ pub struct Rename<'a> {
}
impl<'a> Rename<'a> {
pub fn new<F, T>(from: F, to: T) -> Self
pub fn new<F, T>(from: F, to: T) -> Result<Self, Error>
where
F: Into<ByteStr<'a>>,
T: Into<ByteStr<'a>>,
F: ToByteStr<'a>,
T: ToByteStr<'a>,
{
Self { id: 0, from: from.into(), to: to.into() }
Ok(Self { id: 0, from: from.to_byte_str()?, to: to.to_byte_str()? })
}
pub fn len(&self) -> usize { size_of_val(&self.id) + 4 + self.from.len() + 4 + self.to.len() }

View file

@ -1,6 +1,6 @@
use serde::{Deserialize, Serialize};
use crate::ByteStr;
use crate::{ByteStr, Error, ToByteStr};
#[derive(Debug, Deserialize, Serialize)]
pub struct Rmdir<'a> {
@ -9,7 +9,12 @@ pub struct Rmdir<'a> {
}
impl<'a> Rmdir<'a> {
pub fn new(path: impl Into<ByteStr<'a>>) -> Self { Self { id: 0, path: path.into() } }
pub fn new<P>(path: P) -> Result<Self, Error>
where
P: ToByteStr<'a>,
{
Ok(Self { id: 0, path: path.to_byte_str()? })
}
pub fn len(&self) -> usize { size_of_val(&self.id) + 4 + self.path.len() }
}

View file

@ -2,7 +2,7 @@ use std::borrow::Cow;
use serde::{Deserialize, Serialize};
use crate::{ByteStr, fs::Attrs};
use crate::{ByteStr, Error, ToByteStr, fs::Attrs};
#[derive(Debug, Deserialize, Serialize)]
pub struct SetStat<'a> {
@ -12,8 +12,11 @@ pub struct SetStat<'a> {
}
impl<'a> SetStat<'a> {
pub fn new(path: impl Into<ByteStr<'a>>, attrs: Attrs) -> Self {
Self { id: 0, path: path.into(), attrs }
pub fn new<P>(path: P, attrs: Attrs) -> Result<Self, Error>
where
P: ToByteStr<'a>,
{
Ok(Self { id: 0, path: path.to_byte_str()?, attrs })
}
pub fn len(&self) -> usize { size_of_val(&self.id) + 4 + self.path.len() + self.attrs.len() }

View file

@ -1,6 +1,6 @@
use serde::{Deserialize, Serialize};
use crate::ByteStr;
use crate::{ByteStr, Error, ToByteStr};
#[derive(Debug, Deserialize, Serialize)]
pub struct Stat<'a> {
@ -9,7 +9,12 @@ pub struct Stat<'a> {
}
impl<'a> Stat<'a> {
pub fn new(path: impl Into<ByteStr<'a>>) -> Self { Self { id: 0, path: path.into() } }
pub fn new<P>(path: P) -> Result<Self, Error>
where
P: ToByteStr<'a>,
{
Ok(Self { id: 0, path: path.to_byte_str()? })
}
pub fn len(&self) -> usize { size_of_val(&self.id) + 4 + self.path.len() }
}

View file

@ -1,6 +1,6 @@
use serde::{Deserialize, Serialize};
use crate::ByteStr;
use crate::{ByteStr, Error, ToByteStr};
#[derive(Debug, Deserialize, Serialize)]
pub struct Symlink<'a> {
@ -10,12 +10,12 @@ pub struct Symlink<'a> {
}
impl<'a> Symlink<'a> {
pub fn new<L, O>(link: L, original: O) -> Self
pub fn new<L, O>(link: L, original: O) -> Result<Self, Error>
where
L: Into<ByteStr<'a>>,
O: Into<ByteStr<'a>>,
L: ToByteStr<'a>,
O: ToByteStr<'a>,
{
Self { id: 0, link: link.into(), original: original.into() }
Ok(Self { id: 0, link: link.to_byte_str()?, original: original.to_byte_str()? })
}
pub fn len(&self) -> usize {

87
yazi-sftp/src/wtf.rs Normal file
View file

@ -0,0 +1,87 @@
use std::{borrow::Cow, ffi::{OsStr, OsString}, os::windows::ffi::{OsStrExt, OsStringExt}};
pub(super) fn bytes_to_wide(mut bytes: &[u8]) -> Cow<'_, OsStr> {
let mut wide: Option<Vec<u16>> = None;
while !bytes.is_empty() {
match (str::from_utf8(bytes), &mut wide) {
(Ok(valid), None) => {
return OsStr::new(valid).into();
}
(Ok(valid), Some(wide)) => {
for ch in valid.chars() {
wide.extend(ch.encode_utf16(&mut [0u16; 2]).iter());
}
break;
}
(Err(err), _) => {
let wide = wide.get_or_insert_with(|| Vec::with_capacity(bytes.len()));
let valid = unsafe { str::from_utf8_unchecked(&bytes[..err.valid_up_to()]) };
for c in valid.chars() {
wide.extend(c.encode_utf16(&mut [0u16; 2]).iter());
}
bytes = &bytes[valid.len()..];
let invalid = err.error_len().unwrap_or(bytes.len());
for &b in &bytes[..invalid] {
wide.push(0xdc00 + b as u16);
}
bytes = &bytes[invalid..];
}
}
}
OsString::from_wide(&wide.unwrap_or_default()).into()
}
pub(super) fn wide_to_bytes(wide: &OsStr) -> Option<Cow<'_, [u8]>> {
if let Some(s) = wide.to_str() {
return Some(s.as_bytes().into());
}
let mut it = wide.encode_wide();
let mut out = Vec::with_capacity(wide.len());
while let Some(w) = it.next() {
if (0xdc00..=0xdcff).contains(&w) {
out.push((w - 0xdc00) as u8);
} else if (0xd800..=0xdbff).contains(&w) {
let x = it.next().filter(|x| (0xdc00..=0xdfff).contains(x))?;
let c = char::from_u32(0x10000 + (((w as u32 - 0xd800) << 10) | (x as u32 - 0xdc00)))?;
out.extend_from_slice(c.encode_utf8(&mut [0u8; 4]).as_bytes());
} else if (0xdc00..=0xdfff).contains(&w) {
return None;
} else {
let c = char::from_u32(w as u32)?;
out.extend_from_slice(c.encode_utf8(&mut [0u8; 4]).as_bytes());
}
}
Some(out.into())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn wtf8_roundtrip() {
let b = &[
b'\0', // NUL
0xff, // 0xFF
b'a', b'b', b'c', // abc
0xf0, 0x9f, 0x98, 0x80, // 😀
0xc3, 0x28, // illegal UTF-8
];
assert_eq!(&*wide_to_bytes(&bytes_to_wide(b)).unwrap(), b);
}
#[test]
#[cfg(windows)]
fn low_surrogates_for_non_utf8() {
use std::os::windows::ffi::OsStrExt;
let os = bytes_to_wide(b"\xFF");
let wide: Vec<u16> = os.encode_wide().collect();
assert_eq!(wide, vec![0xdc00 + 0xff]);
}
}