|
最近看了 阮一峰的一篇文章介绍使用贝叶斯推断方法做拼写检查的文章,该文章的易懂程度输于 Google 技术总监写的原文,其优秀的译文。
说明了啥,越是大师级的人写的文章往往越易懂。所以关于贝叶斯方法我就不解释了。只帖代码
我使用golang对照实现了一遍:
一是为了弄懂其算法细节
二是不使前段时间看的golang语法忘记
就像几年前在学校时候对着C版的数据结构书用C#去实现一样。
package main
import (
"fmt"
"io/ioutil"
"regexp"
)
var (
NWORDS map[string]int
)
const (
alphabet = "abcdefghijklmnopqrstuvwxyz"
)
func words(text string) []string {
regex, _ := regexp.Compile("[a-z]+")
return regex.FindAllString(text, -1)
}
func train(features []string) map[string]int {
result := make(map[string]int)
for i := range features {
_, isexist := result[features]
if !isexist {
result[features] = 1
} else {
result[features] += 1
}
}
return result
}
func edit1(word string) []string {
type tuple struct{ a, b string }
var splits []tuple
for i := 0; i < len(word)+1; i++ {
splits = append(splits, tuple{word[:i], word[i:]})
}
var deletes []string
for _, t := range splits {
if len(t.b) > 0 {
deletes = append(deletes, t.a+t.b[1:])
}
}
var transposes []string
for _, t := range splits {
if len(t.b) > 1 {
transposes = append(transposes, t.a+string(t.b[1])+string(t.b[0])+t.b[2:])
}
}
var replaces []string
for _, c := range alphabet {
for _, t := range splits {
if len(t.b) > 0 {
replaces = append(replaces, t.a+string(c)+t.b[1:])
}
}
}
var inserts []string
for _, c := range alphabet {
for _, t := range splits {
inserts = append(inserts, t.a+string(c)+t.b)
}
}
//concat this slice
deletes = append(deletes, transposes...)
deletes = append(deletes, replaces...)
deletes = append(deletes, inserts...)
return set(deletes)
}
func known_edits2(word string) []string {
var arr []string
for _, e1 := range edit1(word) {
for _, e2 := range edit1(e1) {
if _, ok := NWORDS[e2]; ok {
arr = append(arr, e2)
}
}
}
return set(arr)
}
func known(words []string) []string {
var knows []string
for _, value := range words {
if _, ok := NWORDS[value]; ok {
knows = append(knows, value)
}
}
return knows
}
func appendIfMissing(slice []string, i string) []string {
for _, ele := range slice {
if ele == i {
return slice
}
}
return append(slice, i)
}
func set(arr []string) []string {
var result []string
for _, ele := range arr {
result = appendIfMissing(result, ele)
}
return result
}
func correct(word string) string {
candidates := known([]string{word})
if len(candidates) |
|
|