split jobs into several batch messages to stay within TTS API limitations for max tokens
This commit is contained in:
parent
b18c04d491
commit
fa08c95ed2
6
go.mod
6
go.mod
@ -2,7 +2,10 @@ module deadbeef.codes/steven/tts
|
||||
|
||||
go 1.22.0
|
||||
|
||||
require cloud.google.com/go/texttospeech v1.7.11
|
||||
require (
|
||||
cloud.google.com/go/texttospeech v1.7.11
|
||||
github.com/hyacinthus/mp3join v0.0.0-20190710105654-d46eaeeb9552
|
||||
)
|
||||
|
||||
require (
|
||||
cloud.google.com/go v0.115.0 // indirect
|
||||
@ -10,6 +13,7 @@ require (
|
||||
cloud.google.com/go/auth/oauth2adapt v0.2.3 // indirect
|
||||
cloud.google.com/go/compute/metadata v0.5.0 // indirect
|
||||
cloud.google.com/go/longrunning v0.5.11 // indirect
|
||||
github.com/dmulholland/mp3lib v0.0.0-20190407131416-50ad4bfbe332 // indirect
|
||||
github.com/felixge/httpsnoop v1.0.4 // indirect
|
||||
github.com/go-logr/logr v1.4.2 // indirect
|
||||
github.com/go-logr/stdr v1.2.2 // indirect
|
||||
|
4
go.sum
4
go.sum
@ -18,6 +18,8 @@ github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGX
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/dmulholland/mp3lib v0.0.0-20190407131416-50ad4bfbe332 h1:zh+x3xrRRobJ+O6Jy+u+8+TSj7qzuW4EL8Hkf5cbAck=
|
||||
github.com/dmulholland/mp3lib v0.0.0-20190407131416-50ad4bfbe332/go.mod h1:U3TgSK0lA/gbTgENpBTSNn/OmowG1hr07mKQqqvbLxE=
|
||||
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
|
||||
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
|
||||
github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
|
||||
@ -60,6 +62,8 @@ github.com/googleapis/enterprise-certificate-proxy v0.3.2 h1:Vie5ybvEvT75RniqhfF
|
||||
github.com/googleapis/enterprise-certificate-proxy v0.3.2/go.mod h1:VLSiSSBs/ksPL8kq3OBOQ6WRI2QnaFynd1DCjZ62+V0=
|
||||
github.com/googleapis/gax-go/v2 v2.13.0 h1:yitjD5f7jQHhyDsnhKEBU52NdvvdSeGzlAnDPT0hH1s=
|
||||
github.com/googleapis/gax-go/v2 v2.13.0/go.mod h1:Z/fvTZXF8/uw7Xu5GuslPw+bplx6SS338j1Is2S+B7A=
|
||||
github.com/hyacinthus/mp3join v0.0.0-20190710105654-d46eaeeb9552 h1:cjR5hraUrLrNBQ6lXsjd/VDtJf7+3TOow++DaTAj8r8=
|
||||
github.com/hyacinthus/mp3join v0.0.0-20190710105654-d46eaeeb9552/go.mod h1:eQzsT6lJmJ/wcTqoHaHfuadmI1lzaHjrdDLO4qKiqcI=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
|
||||
|
146
main.go
146
main.go
@ -1,6 +1,7 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
@ -15,6 +16,7 @@ import (
|
||||
|
||||
texttospeech "cloud.google.com/go/texttospeech/apiv1"
|
||||
"cloud.google.com/go/texttospeech/apiv1/texttospeechpb"
|
||||
"github.com/hyacinthus/mp3join"
|
||||
)
|
||||
|
||||
type Job struct {
|
||||
@ -28,11 +30,11 @@ func loadJobs(directory string) ([]Job, error) {
|
||||
jobs := make([]Job, 0)
|
||||
err := filepath.Walk(directory, func(path string, info os.FileInfo, err error) error {
|
||||
if !info.IsDir() && strings.HasSuffix(path, ".json") {
|
||||
f, err := os.Open(fmt.Sprintf("%s", path))
|
||||
defer f.Close()
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open file '%s': %v", path, err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
b, err := io.ReadAll(f)
|
||||
if err != nil {
|
||||
@ -75,27 +77,109 @@ func main() {
|
||||
// Check each message if MP3 file already exists, if not then it will synthesize and save the audio to file
|
||||
// file names are SHA256 sums of the spoken text
|
||||
for _, job := range jobs {
|
||||
err = processJob(job)
|
||||
if err != nil {
|
||||
log.Printf("job '%s' failed to process: %v", job.Name, err)
|
||||
}
|
||||
}
|
||||
|
||||
err = cleanCache(jobs)
|
||||
if err != nil {
|
||||
log.Printf("failed to clean cache: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Splits a job
|
||||
func processJob(job Job) error {
|
||||
messages, err := splitJob(job.Message, 12)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to split job '%s': %v", job.Name, err)
|
||||
}
|
||||
|
||||
// Process
|
||||
|
||||
var mp3Files [][]byte
|
||||
|
||||
for i, message := range messages {
|
||||
|
||||
h := sha256.New()
|
||||
h.Write([]byte(job.Message))
|
||||
h.Write([]byte(message))
|
||||
sha := h.Sum(nil) // "sha" is uint8 type, encoded in base16
|
||||
|
||||
filename := fmt.Sprintf("%s-%s.mp3", job.Name, hex.EncodeToString(sha))
|
||||
filename := fmt.Sprintf("jobs/cache/%s-%d-%s.mp3", job.Name, i, hex.EncodeToString(sha))
|
||||
var mp3 []byte
|
||||
|
||||
if _, err := os.Stat(fmt.Sprintf("%s", filename)); errors.Is(err, os.ErrNotExist) {
|
||||
mp3, err := getTTS(job.Voice, job.Message, job.Language)
|
||||
if _, err := os.Stat(filename); errors.Is(err, os.ErrNotExist) {
|
||||
mp3, err = getTTS(job.Voice, message, job.Language)
|
||||
if err != nil {
|
||||
log.Printf("failed to get TTS: %v", err)
|
||||
continue
|
||||
return fmt.Errorf("failed to get TTS: %v", err)
|
||||
}
|
||||
|
||||
err = os.WriteFile(fmt.Sprintf("%s", filename), mp3, 0644)
|
||||
err = os.WriteFile(filename, mp3, 0644)
|
||||
if err != nil {
|
||||
log.Printf("failed to write mp3 file: %v", err)
|
||||
continue
|
||||
return fmt.Errorf("failed to write mp3 file '%s': %v", filename, err)
|
||||
}
|
||||
log.Printf("Audio content written to file: %v\n", filename)
|
||||
} else {
|
||||
mp3, err = os.ReadFile(filename)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read mp3 file '%s': %v", filename, err)
|
||||
}
|
||||
}
|
||||
mp3Files = append(mp3Files, mp3)
|
||||
}
|
||||
|
||||
mp3Joiner := mp3join.New()
|
||||
|
||||
for i, mp3 := range mp3Files {
|
||||
r := bytes.NewReader(mp3)
|
||||
err := mp3Joiner.Append(r)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to join mp3 file '%d': %v", i, err)
|
||||
}
|
||||
}
|
||||
|
||||
dest := mp3Joiner.Reader()
|
||||
|
||||
combinedBytes, err := io.ReadAll(dest)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read combined bytes: %v", err)
|
||||
}
|
||||
|
||||
f, err := os.Create(fmt.Sprintf("jobs/%s.mp3", job.Name))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create joined mp3 file: %v", err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
_, err = f.Write(combinedBytes)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write combined bytes to file: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Splits a job.Message by maxSentences - to get around API limitations on max tokens
|
||||
func splitJob(jobMessage string, maxSentences int) ([]string, error) {
|
||||
sentences := strings.Split(jobMessage, ". ")
|
||||
|
||||
messages := make([]string, 0)
|
||||
var message string
|
||||
for i, sentence := range sentences {
|
||||
if i%maxSentences == 0 {
|
||||
if len(message) > 0 {
|
||||
messages = append(messages, message)
|
||||
}
|
||||
message = ""
|
||||
}
|
||||
message = fmt.Sprintf("%s %s.", message, sentence)
|
||||
}
|
||||
if len(message) > 0 {
|
||||
messages = append(messages, message)
|
||||
}
|
||||
|
||||
return messages, nil
|
||||
}
|
||||
|
||||
// Accepts a string and returns a byteslice of the message in mp3 format, and an error
|
||||
@ -176,3 +260,43 @@ func ListVoices() error {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func cleanCache(jobs []Job) error {
|
||||
cacheFiles, err := os.ReadDir("jobs/cache")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read jobs/cache directory: %v", err)
|
||||
}
|
||||
|
||||
for _, file := range cacheFiles {
|
||||
if file.IsDir() {
|
||||
continue
|
||||
}
|
||||
if !strings.HasSuffix(file.Name(), ".mp3") {
|
||||
continue
|
||||
}
|
||||
|
||||
splitName := strings.Split(file.Name(), "-")
|
||||
if len(splitName < 1) {
|
||||
continue
|
||||
}
|
||||
|
||||
// First, check if this file even has an active job referencing it, if not then delete
|
||||
var foundActiveJob bool
|
||||
for _, job := range jobs {
|
||||
if job.Name == splitName[0] {
|
||||
foundActiveJob = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !foundActiveJob {
|
||||
err = os.Remove(fmt.Sprintf("jobs/cache/%s", file.Name()))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to remove file '%s': %v", file.Name(), err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Second, check if this version of a split job message is the active one, if not then delete
|
||||
// TBD
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user