Assign numbers to words.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
xorWords/create_db.go

209 lines
4.7 KiB

package main
import (
"path/filepath"
"os"
"database/sql"
"fmt"
"flag"
"io/ioutil"
"math/rand"
"log"
"strings"
_ "github.com/mattn/go-sqlite3"
)
var version = "0.0.1"
var topEnglishWords = []string{
"the", "and", "of", "to", "a", "in", "is", "it", "you", "that",
"he", "was", "for", "on", "are", "with", "as", "I", "his", "they",
"be", "at", "one", "have", "this", "from", "or", "had", "by", "hot",
"word", "but", "what", "some", "we", "can", "out", "other", "were",
"all", "there", "when", "up", "use", "your", "how", "said", "an",
"each", "she", "which", "do", "their", "time", "if", "will", "way",
"about", "many", "then", "them", "write", "would", "like", "so", "these",
"here", "me", "make", "where", "give", "now", "just",
}
var commonSymbols = []string{
".", ",", "!", ";", "?", "(", ")", "'", "\"", "...", ":", "*", "$", "@", "#", "%",
}
func insertSymbols(db *sql.DB) {
// Insert symbols into the database
tx, err := db.Begin()
if err != nil {
log.Fatal(err)
}
stmt, err := tx.Prepare("INSERT INTO words(word) VALUES(?)")
if err != nil {
log.Fatal(err)
}
defer stmt.Close()
for _, word := range commonSymbols {
_, err = stmt.Exec(word)
if err != nil {
log.Fatal(err)
}
}
// Commit the transaction
tx.Commit()
fmt.Println("Common symbols inserted into the database.")
}
func insertCommonEnglishWords(db *sql.DB) {
// Insert top English words into the database
tx, err := db.Begin()
if err != nil {
log.Fatal(err)
}
stmt, err := tx.Prepare("INSERT INTO words(word) VALUES(?)")
if err != nil {
log.Fatal(err)
}
defer stmt.Close()
for _, word := range topEnglishWords {
_, err = stmt.Exec(word)
if err != nil {
log.Fatal(err)
}
}
// Commit the transaction
tx.Commit()
fmt.Println("Top English words inserted into the database.")
}
func insertEnglishWords(db *sql.DB, dictFilePtr *string) {
// Read the contents of the shuf american-english file
content, err := ioutil.ReadFile(*dictFilePtr)
if err != nil {
log.Fatal(err)
}
// Split content into words
words := strings.Fields(string(content))
// Insert american-english words into the database
tx, err := db.Begin()
if err != nil {
log.Fatal(err)
}
stmt, err := tx.Prepare("INSERT INTO words(word) VALUES(?)")
if err != nil {
log.Fatal(err)
}
defer stmt.Close()
for _, word := range words {
_, err = stmt.Exec(word)
if err != nil {
log.Fatal(err)
}
}
// Commit the transaction
tx.Commit()
fmt.Println("American-English words inserted into the database.")
}
func main() {
help := flag.Bool("help", false, "print a short usage message")
dbPtr := flag.String("db", "./english_words.db", "SQLite3 DB file to use")
dictFilePtr := flag.String("dict", "./rnd-english", "Dictionary file to use")
bDbUpdatePtr := flag.Bool("db-update", false, "Only update the database")
bSkipShufflePtr := flag.Bool("skip-shuffle", false, "Skip shuffle")
bCompressionPtr := flag.Bool("compression", false, "Use High Freq Words to compress")
bSymbolsLastPtr := flag.Bool("symbols-last", false, "Use symbols last")
flag.Parse()
if *help {
usage()
return
}
// Open SQLite database file
db, err := sql.Open("sqlite3", *dbPtr)
if err != nil {
log.Fatal(err)
}
defer db.Close()
// Create table
_, err = db.Exec(`CREATE TABLE IF NOT EXISTS words (id INTEGER PRIMARY KEY, word TEXT);`)
if err != nil {
log.Fatal(err)
}
if *bDbUpdatePtr {
insertEnglishWords(db, dictFilePtr)
return
}
if ! *bSkipShufflePtr {
fmt.Println("Shuffling.....")
// Randomize high Freq. words
shuffleArray(topEnglishWords)
shuffleArray(commonSymbols)
}
if ! *bSymbolsLastPtr {
insertSymbols(db)
}
if *bCompressionPtr {
insertCommonEnglishWords(db)
}
insertEnglishWords(db, dictFilePtr)
if *bSymbolsLastPtr {
insertSymbols(db)
}
}
func shuffleArray(arr []string) {
n := len(arr)
for i := n - 1; i > 0; i-- {
// Generate a random index between 0 and i (inclusive)
j := rand.Intn(i + 1)
// Swap the elements at indices i and j
arr[i], arr[j] = arr[j], arr[i]
}
}
func usage() {
progName := filepath.Base(os.Args[0])
fmt.Printf(`%s version %s, (c) 2024 Bob
PS: Don't forget to shuffle the Dict file $ shuf /usr/share/dict/american-english > rnd-english
Usage:
%s [-help] [-db English.db] [-dict rnd-english] [-shuffle] [-compression] [-later]
-help Print this help message.
-db SQLite3 Database to save to
-dict Dictionary file to use
-db-update Only update the database, skip adding high-freq words and symbols
-skip-shuffle Skip shuffling High-Freq. words around
-compression Compress using high Frequency Words List, less secure
-symbols-last Use symbols last, gives away size of dict!
`, progName, version, progName)
}