diff options
| author | Mistivia <i@mistivia.com> | 2025-11-02 15:27:18 +0800 |
|---|---|---|
| committer | Mistivia <i@mistivia.com> | 2025-11-02 15:27:18 +0800 |
| commit | e9c24f4af7ed56760f6db7941827d09f6db9020b (patch) | |
| tree | 62128c43b883ce5e3148113350978755779bb5de /teleirc/matterbridge/vendor/github.com/saintfish/chardet/recognizer.go | |
| parent | 58d5e7cfda4781d8a57ec52aefd02983835c301a (diff) | |
add matterbridge
Diffstat (limited to 'teleirc/matterbridge/vendor/github.com/saintfish/chardet/recognizer.go')
| -rw-r--r-- | teleirc/matterbridge/vendor/github.com/saintfish/chardet/recognizer.go | 83 |
1 files changed, 83 insertions, 0 deletions
diff --git a/teleirc/matterbridge/vendor/github.com/saintfish/chardet/recognizer.go b/teleirc/matterbridge/vendor/github.com/saintfish/chardet/recognizer.go new file mode 100644 index 0000000..1bf8461 --- /dev/null +++ b/teleirc/matterbridge/vendor/github.com/saintfish/chardet/recognizer.go @@ -0,0 +1,83 @@ +package chardet + +type recognizer interface { + Match(*recognizerInput) recognizerOutput +} + +type recognizerOutput Result + +type recognizerInput struct { + raw []byte + input []byte + tagStripped bool + byteStats []int + hasC1Bytes bool +} + +func newRecognizerInput(raw []byte, stripTag bool) *recognizerInput { + input, stripped := mayStripInput(raw, stripTag) + byteStats := computeByteStats(input) + return &recognizerInput{ + raw: raw, + input: input, + tagStripped: stripped, + byteStats: byteStats, + hasC1Bytes: computeHasC1Bytes(byteStats), + } +} + +func mayStripInput(raw []byte, stripTag bool) (out []byte, stripped bool) { + const inputBufferSize = 8192 + out = make([]byte, 0, inputBufferSize) + var badTags, openTags int32 + var inMarkup bool = false + stripped = false + if stripTag { + stripped = true + for _, c := range raw { + if c == '<' { + if inMarkup { + badTags += 1 + } + inMarkup = true + openTags += 1 + } + if !inMarkup { + out = append(out, c) + if len(out) >= inputBufferSize { + break + } + } + if c == '>' { + inMarkup = false + } + } + } + if openTags < 5 || openTags/5 < badTags || (len(out) < 100 && len(raw) > 600) { + limit := len(raw) + if limit > inputBufferSize { + limit = inputBufferSize + } + out = make([]byte, limit) + copy(out, raw[:limit]) + stripped = false + } + return +} + +func computeByteStats(input []byte) []int { + r := make([]int, 256) + for _, c := range input { + r[c] += 1 + } + return r +} + +func computeHasC1Bytes(byteStats []int) bool { + for _, count := range byteStats[0x80 : 0x9F+1] { + if count > 0 { + return true + } + } + return false +} |
