You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
209 lines
4.7 KiB
209 lines
4.7 KiB
package main
|
|
|
|
import (
|
|
"path/filepath"
|
|
"os"
|
|
"database/sql"
|
|
"fmt"
|
|
"flag"
|
|
"io/ioutil"
|
|
"math/rand"
|
|
"log"
|
|
"strings"
|
|
|
|
_ "github.com/mattn/go-sqlite3"
|
|
)
|
|
|
|
var version = "0.0.1"
|
|
|
|
var topEnglishWords = []string{
|
|
"the", "and", "of", "to", "a", "in", "is", "it", "you", "that",
|
|
"he", "was", "for", "on", "are", "with", "as", "I", "his", "they",
|
|
"be", "at", "one", "have", "this", "from", "or", "had", "by", "hot",
|
|
"word", "but", "what", "some", "we", "can", "out", "other", "were",
|
|
"all", "there", "when", "up", "use", "your", "how", "said", "an",
|
|
"each", "she", "which", "do", "their", "time", "if", "will", "way",
|
|
"about", "many", "then", "them", "write", "would", "like", "so", "these",
|
|
"here", "me", "make", "where", "give", "now", "just",
|
|
}
|
|
|
|
var commonSymbols = []string{
|
|
".", ",", "!", ";", "?", "(", ")", "'", "\"", "...", ":", "*", "$", "@", "#", "%",
|
|
}
|
|
|
|
func insertSymbols(db *sql.DB) {
|
|
// Insert symbols into the database
|
|
tx, err := db.Begin()
|
|
if err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
|
|
stmt, err := tx.Prepare("INSERT INTO words(word) VALUES(?)")
|
|
if err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
defer stmt.Close()
|
|
|
|
for _, word := range commonSymbols {
|
|
_, err = stmt.Exec(word)
|
|
if err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
}
|
|
|
|
// Commit the transaction
|
|
tx.Commit()
|
|
|
|
fmt.Println("Common symbols inserted into the database.")
|
|
}
|
|
|
|
func insertCommonEnglishWords(db *sql.DB) {
|
|
// Insert top English words into the database
|
|
tx, err := db.Begin()
|
|
if err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
|
|
stmt, err := tx.Prepare("INSERT INTO words(word) VALUES(?)")
|
|
if err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
defer stmt.Close()
|
|
|
|
for _, word := range topEnglishWords {
|
|
_, err = stmt.Exec(word)
|
|
if err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
}
|
|
|
|
// Commit the transaction
|
|
tx.Commit()
|
|
|
|
fmt.Println("Top English words inserted into the database.")
|
|
}
|
|
|
|
func insertEnglishWords(db *sql.DB, dictFilePtr *string) {
|
|
// Read the contents of the shuf american-english file
|
|
content, err := ioutil.ReadFile(*dictFilePtr)
|
|
if err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
|
|
// Split content into words
|
|
words := strings.Fields(string(content))
|
|
|
|
// Insert american-english words into the database
|
|
tx, err := db.Begin()
|
|
if err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
|
|
stmt, err := tx.Prepare("INSERT INTO words(word) VALUES(?)")
|
|
if err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
defer stmt.Close()
|
|
|
|
for _, word := range words {
|
|
_, err = stmt.Exec(word)
|
|
if err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
}
|
|
|
|
// Commit the transaction
|
|
tx.Commit()
|
|
|
|
fmt.Println("American-English words inserted into the database.")
|
|
}
|
|
|
|
func main() {
|
|
help := flag.Bool("help", false, "print a short usage message")
|
|
dbPtr := flag.String("db", "./english_words.db", "SQLite3 DB file to use")
|
|
dictFilePtr := flag.String("dict", "./rnd-english", "Dictionary file to use")
|
|
bDbUpdatePtr := flag.Bool("db-update", false, "Only update the database")
|
|
bSkipShufflePtr := flag.Bool("skip-shuffle", false, "Skip shuffle")
|
|
bCompressionPtr := flag.Bool("compression", false, "Use High Freq Words to compress")
|
|
bSymbolsLastPtr := flag.Bool("symbols-last", false, "Use symbols last")
|
|
flag.Parse()
|
|
|
|
if *help {
|
|
usage()
|
|
return
|
|
}
|
|
|
|
// Open SQLite database file
|
|
db, err := sql.Open("sqlite3", *dbPtr)
|
|
if err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
defer db.Close()
|
|
|
|
// Create table
|
|
_, err = db.Exec(`CREATE TABLE IF NOT EXISTS words (id INTEGER PRIMARY KEY, word TEXT);`)
|
|
if err != nil {
|
|
log.Fatal(err)
|
|
}
|
|
|
|
if *bDbUpdatePtr {
|
|
insertEnglishWords(db, dictFilePtr)
|
|
return
|
|
}
|
|
|
|
if ! *bSkipShufflePtr {
|
|
fmt.Println("Shuffling.....")
|
|
// Randomize high Freq. words
|
|
shuffleArray(topEnglishWords)
|
|
shuffleArray(commonSymbols)
|
|
}
|
|
|
|
if ! *bSymbolsLastPtr {
|
|
insertSymbols(db)
|
|
}
|
|
|
|
if *bCompressionPtr {
|
|
insertCommonEnglishWords(db)
|
|
}
|
|
|
|
insertEnglishWords(db, dictFilePtr)
|
|
|
|
if *bSymbolsLastPtr {
|
|
insertSymbols(db)
|
|
}
|
|
|
|
}
|
|
|
|
func shuffleArray(arr []string) {
|
|
n := len(arr)
|
|
for i := n - 1; i > 0; i-- {
|
|
// Generate a random index between 0 and i (inclusive)
|
|
j := rand.Intn(i + 1)
|
|
|
|
// Swap the elements at indices i and j
|
|
arr[i], arr[j] = arr[j], arr[i]
|
|
}
|
|
}
|
|
|
|
func usage() {
|
|
progName := filepath.Base(os.Args[0])
|
|
fmt.Printf(`%s version %s, (c) 2024 Bob
|
|
PS: Don't forget to shuffle the Dict file $ shuf /usr/share/dict/american-english > rnd-english
|
|
Usage:
|
|
%s [-help] [-db English.db] [-dict rnd-english] [-shuffle] [-compression] [-later]
|
|
|
|
-help Print this help message.
|
|
|
|
-db SQLite3 Database to save to
|
|
|
|
-dict Dictionary file to use
|
|
|
|
-db-update Only update the database, skip adding high-freq words and symbols
|
|
|
|
-skip-shuffle Skip shuffling High-Freq. words around
|
|
|
|
-compression Compress using high Frequency Words List, less secure
|
|
|
|
-symbols-last Use symbols last, gives away size of dict!
|
|
`, progName, version, progName)
|
|
}
|
|
|