Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correctly convert surrogate UTF-16 to rune on MS-Windows #12

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 8 additions & 21 deletions internal/win32/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,26 +15,13 @@ func hiword(x uint32) uint16 {
return uint16((x >> 16) & 0xFFFF)
}

func decodeUtf16(s uint16) rune {
const (
// 0xd800-0xdc00 encodes the high 10 bits of a pair.
// 0xdc00-0xe000 encodes the low 10 bits of a pair.
// the value is those 20 bits plus 0x10000.
surr1 = 0xd800
surr3 = 0xe000

// Unicode replacement character
replacementChar = '\uFFFD'
)
func isSurrogatedCharacter(x rune) bool {
return x > 0xd800 // Surrogate characters are mounted after 0xd800
}

var a rune
switch r := s; {
case r < surr1, surr3 <= r:
// normal rune
a = rune(r)
default:
// invalid surrogate sequence
a = replacementChar
}
return a
// surrogatedUtf16toRune recovers code points from high and low surrogates
func surrogatedUtf16toRune(high rune, low rune) rune {
high -= 0xd800
low -= 0xdc00
return (high << 10) + low + 0x10000
}
16 changes: 14 additions & 2 deletions internal/win32/window.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ type Window struct {
cursorCaptureCount int // non-shared
modifiers events.ModifiersState // non-shared

highSurrogated rune // non-shared

// callbacks
resizedCb atomicx.Pointer[events.WindowResizedCallback]
closeRequestedCb atomicx.Pointer[events.WindowCloseRequestedCallback]
Expand Down Expand Up @@ -809,11 +811,21 @@ func windowProc(window, msg, wparam, lparam uintptr) uintptr {
}
}
return 0

case procs.WM_CHAR, procs.WM_SYSCHAR:
// Most UTF16 without surrogates can simply be considered rune.
ch := rune(wparam)
// The surrogated UTF16 character is POSTed as two consecutive WM_CHARs: high surrogate and low surrogate.
if isSurrogatedCharacter(ch) {
if w.highSurrogated == 0 {
w.highSurrogated = ch
return 0
}
ch = surrogatedUtf16toRune(w.highSurrogated, ch)
w.highSurrogated = 0
}
if cb := w.receivedCharacterCb.Load(); cb != nil {
if cb := (*cb); cb != nil {
cb(decodeUtf16(uint16(wparam)))
cb(ch)
}
}
return 0
Expand Down
Loading