From fa08c95ed2b07b07b34cda6a2c50d251b3b70508 Mon Sep 17 00:00:00 2001 From: Steven Polley Date: Tue, 6 Aug 2024 18:01:23 -0600 Subject: [PATCH] split jobs into several batch messages to stay within TTS API limitations for max tokens --- go.mod | 6 +- go.sum | 4 ++ main.go | 166 +++++++++++++++++++++++++++++++++++++++++++++++++------- 3 files changed, 154 insertions(+), 22 deletions(-) diff --git a/go.mod b/go.mod index cdf5898..1b9ae21 100644 --- a/go.mod +++ b/go.mod @@ -2,7 +2,10 @@ module deadbeef.codes/steven/tts go 1.22.0 -require cloud.google.com/go/texttospeech v1.7.11 +require ( + cloud.google.com/go/texttospeech v1.7.11 + github.com/hyacinthus/mp3join v0.0.0-20190710105654-d46eaeeb9552 +) require ( cloud.google.com/go v0.115.0 // indirect @@ -10,6 +13,7 @@ require ( cloud.google.com/go/auth/oauth2adapt v0.2.3 // indirect cloud.google.com/go/compute/metadata v0.5.0 // indirect cloud.google.com/go/longrunning v0.5.11 // indirect + github.com/dmulholland/mp3lib v0.0.0-20190407131416-50ad4bfbe332 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/go-logr/logr v1.4.2 // indirect github.com/go-logr/stdr v1.2.2 // indirect diff --git a/go.sum b/go.sum index 0f53342..03b4b86 100644 --- a/go.sum +++ b/go.sum @@ -18,6 +18,8 @@ github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGX github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dmulholland/mp3lib v0.0.0-20190407131416-50ad4bfbe332 h1:zh+x3xrRRobJ+O6Jy+u+8+TSj7qzuW4EL8Hkf5cbAck= +github.com/dmulholland/mp3lib v0.0.0-20190407131416-50ad4bfbe332/go.mod h1:U3TgSK0lA/gbTgENpBTSNn/OmowG1hr07mKQqqvbLxE= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= @@ -60,6 +62,8 @@ github.com/googleapis/enterprise-certificate-proxy v0.3.2 h1:Vie5ybvEvT75RniqhfF github.com/googleapis/enterprise-certificate-proxy v0.3.2/go.mod h1:VLSiSSBs/ksPL8kq3OBOQ6WRI2QnaFynd1DCjZ62+V0= github.com/googleapis/gax-go/v2 v2.13.0 h1:yitjD5f7jQHhyDsnhKEBU52NdvvdSeGzlAnDPT0hH1s= github.com/googleapis/gax-go/v2 v2.13.0/go.mod h1:Z/fvTZXF8/uw7Xu5GuslPw+bplx6SS338j1Is2S+B7A= +github.com/hyacinthus/mp3join v0.0.0-20190710105654-d46eaeeb9552 h1:cjR5hraUrLrNBQ6lXsjd/VDtJf7+3TOow++DaTAj8r8= +github.com/hyacinthus/mp3join v0.0.0-20190710105654-d46eaeeb9552/go.mod h1:eQzsT6lJmJ/wcTqoHaHfuadmI1lzaHjrdDLO4qKiqcI= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= diff --git a/main.go b/main.go index a4d5ee4..cf99ee9 100644 --- a/main.go +++ b/main.go @@ -1,6 +1,7 @@ package main import ( + "bytes" "context" "crypto/sha256" "encoding/hex" @@ -15,6 +16,7 @@ import ( texttospeech "cloud.google.com/go/texttospeech/apiv1" "cloud.google.com/go/texttospeech/apiv1/texttospeechpb" + "github.com/hyacinthus/mp3join" ) type Job struct { @@ -28,11 +30,11 @@ func loadJobs(directory string) ([]Job, error) { jobs := make([]Job, 0) err := filepath.Walk(directory, func(path string, info os.FileInfo, err error) error { if !info.IsDir() && strings.HasSuffix(path, ".json") { - f, err := os.Open(fmt.Sprintf("%s", path)) - defer f.Close() + f, err := os.Open(path) if err != nil { return fmt.Errorf("failed to open file '%s': %v", path, err) } + defer f.Close() b, err := io.ReadAll(f) if err != nil { @@ -75,27 +77,109 @@ func main() { // Check each message if MP3 file already exists, if not then it will synthesize and save the audio to file // file names are SHA256 sums of the spoken text for _, job := range jobs { - h := sha256.New() - h.Write([]byte(job.Message)) - sha := h.Sum(nil) // "sha" is uint8 type, encoded in base16 - - filename := fmt.Sprintf("%s-%s.mp3", job.Name, hex.EncodeToString(sha)) - - if _, err := os.Stat(fmt.Sprintf("%s", filename)); errors.Is(err, os.ErrNotExist) { - mp3, err := getTTS(job.Voice, job.Message, job.Language) - if err != nil { - log.Printf("failed to get TTS: %v", err) - continue - } - - err = os.WriteFile(fmt.Sprintf("%s", filename), mp3, 0644) - if err != nil { - log.Printf("failed to write mp3 file: %v", err) - continue - } - log.Printf("Audio content written to file: %v\n", filename) + err = processJob(job) + if err != nil { + log.Printf("job '%s' failed to process: %v", job.Name, err) } } + + err = cleanCache(jobs) + if err != nil { + log.Printf("failed to clean cache: %v", err) + } +} + +// Splits a job +func processJob(job Job) error { + messages, err := splitJob(job.Message, 12) + if err != nil { + return fmt.Errorf("failed to split job '%s': %v", job.Name, err) + } + + // Process + + var mp3Files [][]byte + + for i, message := range messages { + + h := sha256.New() + h.Write([]byte(message)) + sha := h.Sum(nil) // "sha" is uint8 type, encoded in base16 + + filename := fmt.Sprintf("jobs/cache/%s-%d-%s.mp3", job.Name, i, hex.EncodeToString(sha)) + var mp3 []byte + + if _, err := os.Stat(filename); errors.Is(err, os.ErrNotExist) { + mp3, err = getTTS(job.Voice, message, job.Language) + if err != nil { + return fmt.Errorf("failed to get TTS: %v", err) + } + + err = os.WriteFile(filename, mp3, 0644) + if err != nil { + return fmt.Errorf("failed to write mp3 file '%s': %v", filename, err) + } + log.Printf("Audio content written to file: %v\n", filename) + } else { + mp3, err = os.ReadFile(filename) + if err != nil { + return fmt.Errorf("failed to read mp3 file '%s': %v", filename, err) + } + } + mp3Files = append(mp3Files, mp3) + } + + mp3Joiner := mp3join.New() + + for i, mp3 := range mp3Files { + r := bytes.NewReader(mp3) + err := mp3Joiner.Append(r) + if err != nil { + return fmt.Errorf("failed to join mp3 file '%d': %v", i, err) + } + } + + dest := mp3Joiner.Reader() + + combinedBytes, err := io.ReadAll(dest) + if err != nil { + return fmt.Errorf("failed to read combined bytes: %v", err) + } + + f, err := os.Create(fmt.Sprintf("jobs/%s.mp3", job.Name)) + if err != nil { + return fmt.Errorf("failed to create joined mp3 file: %v", err) + } + defer f.Close() + + _, err = f.Write(combinedBytes) + if err != nil { + return fmt.Errorf("failed to write combined bytes to file: %v", err) + } + + return nil +} + +// Splits a job.Message by maxSentences - to get around API limitations on max tokens +func splitJob(jobMessage string, maxSentences int) ([]string, error) { + sentences := strings.Split(jobMessage, ". ") + + messages := make([]string, 0) + var message string + for i, sentence := range sentences { + if i%maxSentences == 0 { + if len(message) > 0 { + messages = append(messages, message) + } + message = "" + } + message = fmt.Sprintf("%s %s.", message, sentence) + } + if len(message) > 0 { + messages = append(messages, message) + } + + return messages, nil } // Accepts a string and returns a byteslice of the message in mp3 format, and an error @@ -176,3 +260,43 @@ func ListVoices() error { return nil } + +func cleanCache(jobs []Job) error { + cacheFiles, err := os.ReadDir("jobs/cache") + if err != nil { + return fmt.Errorf("failed to read jobs/cache directory: %v", err) + } + + for _, file := range cacheFiles { + if file.IsDir() { + continue + } + if !strings.HasSuffix(file.Name(), ".mp3") { + continue + } + + splitName := strings.Split(file.Name(), "-") + if len(splitName < 1) { + continue + } + + // First, check if this file even has an active job referencing it, if not then delete + var foundActiveJob bool + for _, job := range jobs { + if job.Name == splitName[0] { + foundActiveJob = true + break + } + } + if !foundActiveJob { + err = os.Remove(fmt.Sprintf("jobs/cache/%s", file.Name())) + if err != nil { + return fmt.Errorf("failed to remove file '%s': %v", file.Name(), err) + } + continue + } + + // Second, check if this version of a split job message is the active one, if not then delete + // TBD + } +}