Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 32 additions & 7 deletions lib/wire/wsmsg.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package wire

import (
"bytes"
"encoding/json"
"html"
"strconv"
"strings"
Expand Down Expand Up @@ -109,11 +110,13 @@ func (m *WsMsg) Format() string {
// Parse parses an incoming text buffer into a message.
//
// The wire format mirrors [WsMsg.Append]. For commands other than [what.Set] and
// [what.Call], if the Data field begins with a double quote it is decoded with
// [strconv.Unquote] and the message is rejected if that fails; data that does not
// begin with a double quote is taken verbatim. Set and Call data is always taken
// verbatim. In all cases the resulting data is sanitized with
// [strings.ToValidUTF8].
// [what.Call], if the Data field begins with a double quote it is decoded as a JSON
// string: [strconv.Unquote] handles the common case, with a fallback to a JSON
// string decode for inputs it rejects but the browser's JSON.stringify can produce
// (notably a lone UTF-16 surrogate, which the fallback maps to U+FFFD). The message
// is rejected only if both decoders fail. Data that does not begin with a double
// quote is taken verbatim, as is all Set and Call data. In all cases the resulting
// data is sanitized with [strings.ToValidUTF8].
func Parse(txt []byte) (WsMsg, bool) {
if len(txt) > 2 && txt[len(txt)-1] == '\n' {
if nl1 := bytes.IndexByte(txt, '\t'); nl1 >= 0 {
Expand All @@ -125,8 +128,21 @@ func Parse(txt []byte) (WsMsg, bool) {
if id := jid.ParseString(string(txt[nl1+1 : nl2])); id.IsValid() {
data := string(txt[nl2+1 : len(txt)-1])
if txt[nl2+1] == '"' && wht != what.Set && wht != what.Call {
var err error
if data, err = strconv.Unquote(data); err != nil {
// The browser encodes this data with JSON.stringify.
// strconv.Unquote decodes the common case cheaply and
// allocation-free, but its grammar is not a superset of
// JSON: it rejects the "\udXXX" lone-surrogate escapes
// JSON.stringify can emit. Fall back to a JSON string
// decode (which maps a lone surrogate to U+FFFD) so a
// legitimate event is decoded rather than silently dropped;
// the ToValidUTF8 below still sanitizes whatever survives.
// The fallback lives in jsonUnquoteString so the address it
// takes does not force data to the heap on every call.
if unq, err := strconv.Unquote(data); err == nil {
data = unq
} else if unq, ok := jsonUnquoteString(data); ok {
data = unq
} else {
return WsMsg{}, false
}
}
Expand All @@ -143,6 +159,15 @@ func Parse(txt []byte) (WsMsg, bool) {
return WsMsg{}, false
}

// jsonUnquoteString decodes s as a JSON string literal, returning ok=false if it
// is not one. It exists as a separate function so that the address it must take of
// its decode target does not force [Parse]'s data local to escape to the heap on
// every call; see the fallback in Parse.
func jsonUnquoteString(s string) (out string, ok bool) {
ok = json.Unmarshal([]byte(s), &out) == nil
return
}

// FillAlert replaces m with an escaped danger alert for err.
func (m *WsMsg) FillAlert(err error) {
m.Jid = 0
Expand Down
31 changes: 31 additions & 0 deletions lib/wire/wsmsg_benchmark_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package wire

import "testing"

var parseBenchSink WsMsg

// BenchmarkParse guards the inbound parse hot path (run on every WebSocket frame)
// across the common command shapes, including the previously-dropped lone surrogate
// that now decodes via the JSON fallback. The common quoted and unquoted paths must
// stay allocation-light; only the rare surrogate case pays the JSON-decode cost.
func BenchmarkParse(b *testing.B) {
frames := []struct {
name string
frame []byte
}{
{"input_plain", []byte("Input\tJid.1\t\"hello world\"\n")},
{"input_escaped", []byte("Input\tJid.1\t\"a\\nb\\tc\"\n")},
{"input_surrogate", []byte("Input\tJid.1\t\"\\ud800\"\n")},
{"input_unquoted", []byte("Input\tJid.1\ttrue\n")},
{"set_verbatim", []byte("Set\tJid.1\tpath={\"a\":1}\n")},
}
for _, f := range frames {
b.Run(f.name, func(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
parseBenchSink, _ = Parse(f.frame)
}
})
}
}
23 changes: 23 additions & 0 deletions lib/wire/wsmsg_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,29 @@ func Test_wsParse_IncompleteFails(t *testing.T) {
}
}

// Test_wsParse_InboundLoneSurrogate covers an inbound frame whose JSON-quoted data
// holds a lone UTF-16 surrogate. The browser's JSON.stringify emits such a value as
// the literal escape "\udXXX" for Input/Click/ContextMenu/Remove payloads (it does
// not throw on lone surrogates), but strconv.Unquote rejects "\udXXX", which would
// silently drop the whole event frame before the ToValidUTF8 sanitizer runs. Parse
// must instead decode it, replacing the surrogate with U+FFFD, and deliver the event.
func Test_wsParse_InboundLoneSurrogate(t *testing.T) {
frame := "Input\tJid.1\t\"\\ud800\"\n" // data field is the 8-byte JSON string "\ud800"
msg, ok := Parse([]byte(frame))
if !ok {
t.Fatalf("Parse dropped a frame with a lone surrogate: %q", frame)
}
if msg.What != what.Input || msg.Jid != jid.Jid(1) {
t.Errorf("unexpected header: %+v", msg)
}
if !utf8.ValidString(msg.Data) {
t.Errorf("Data is not valid UTF-8: %q", msg.Data)
}
if msg.Data != "�" {
t.Errorf("got Data %q, want the replacement char %q", msg.Data, "�")
}
}

func Fuzz_wsParse(f *testing.F) {
f.Add([]byte("Update\t\t\"\"\n"))
f.Add([]byte("Click\t\t\"10 20 5 name\\tJid.1\"\n"))
Expand Down
Loading