split jobs into several batch messages to stay within TTS API limitations for max tokens
This commit is contained in:
parent
b18c04d491
commit
fa08c95ed2
6
go.mod
6
go.mod
@ -2,7 +2,10 @@ module deadbeef.codes/steven/tts
|
|||||||
|
|
||||||
go 1.22.0
|
go 1.22.0
|
||||||
|
|
||||||
require cloud.google.com/go/texttospeech v1.7.11
|
require (
|
||||||
|
cloud.google.com/go/texttospeech v1.7.11
|
||||||
|
github.com/hyacinthus/mp3join v0.0.0-20190710105654-d46eaeeb9552
|
||||||
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
cloud.google.com/go v0.115.0 // indirect
|
cloud.google.com/go v0.115.0 // indirect
|
||||||
@ -10,6 +13,7 @@ require (
|
|||||||
cloud.google.com/go/auth/oauth2adapt v0.2.3 // indirect
|
cloud.google.com/go/auth/oauth2adapt v0.2.3 // indirect
|
||||||
cloud.google.com/go/compute/metadata v0.5.0 // indirect
|
cloud.google.com/go/compute/metadata v0.5.0 // indirect
|
||||||
cloud.google.com/go/longrunning v0.5.11 // indirect
|
cloud.google.com/go/longrunning v0.5.11 // indirect
|
||||||
|
github.com/dmulholland/mp3lib v0.0.0-20190407131416-50ad4bfbe332 // indirect
|
||||||
github.com/felixge/httpsnoop v1.0.4 // indirect
|
github.com/felixge/httpsnoop v1.0.4 // indirect
|
||||||
github.com/go-logr/logr v1.4.2 // indirect
|
github.com/go-logr/logr v1.4.2 // indirect
|
||||||
github.com/go-logr/stdr v1.2.2 // indirect
|
github.com/go-logr/stdr v1.2.2 // indirect
|
||||||
|
4
go.sum
4
go.sum
@ -18,6 +18,8 @@ github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGX
|
|||||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
|
github.com/dmulholland/mp3lib v0.0.0-20190407131416-50ad4bfbe332 h1:zh+x3xrRRobJ+O6Jy+u+8+TSj7qzuW4EL8Hkf5cbAck=
|
||||||
|
github.com/dmulholland/mp3lib v0.0.0-20190407131416-50ad4bfbe332/go.mod h1:U3TgSK0lA/gbTgENpBTSNn/OmowG1hr07mKQqqvbLxE=
|
||||||
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
|
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
|
||||||
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
|
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
|
||||||
github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
|
github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
|
||||||
@ -60,6 +62,8 @@ github.com/googleapis/enterprise-certificate-proxy v0.3.2 h1:Vie5ybvEvT75RniqhfF
|
|||||||
github.com/googleapis/enterprise-certificate-proxy v0.3.2/go.mod h1:VLSiSSBs/ksPL8kq3OBOQ6WRI2QnaFynd1DCjZ62+V0=
|
github.com/googleapis/enterprise-certificate-proxy v0.3.2/go.mod h1:VLSiSSBs/ksPL8kq3OBOQ6WRI2QnaFynd1DCjZ62+V0=
|
||||||
github.com/googleapis/gax-go/v2 v2.13.0 h1:yitjD5f7jQHhyDsnhKEBU52NdvvdSeGzlAnDPT0hH1s=
|
github.com/googleapis/gax-go/v2 v2.13.0 h1:yitjD5f7jQHhyDsnhKEBU52NdvvdSeGzlAnDPT0hH1s=
|
||||||
github.com/googleapis/gax-go/v2 v2.13.0/go.mod h1:Z/fvTZXF8/uw7Xu5GuslPw+bplx6SS338j1Is2S+B7A=
|
github.com/googleapis/gax-go/v2 v2.13.0/go.mod h1:Z/fvTZXF8/uw7Xu5GuslPw+bplx6SS338j1Is2S+B7A=
|
||||||
|
github.com/hyacinthus/mp3join v0.0.0-20190710105654-d46eaeeb9552 h1:cjR5hraUrLrNBQ6lXsjd/VDtJf7+3TOow++DaTAj8r8=
|
||||||
|
github.com/hyacinthus/mp3join v0.0.0-20190710105654-d46eaeeb9552/go.mod h1:eQzsT6lJmJ/wcTqoHaHfuadmI1lzaHjrdDLO4qKiqcI=
|
||||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
|
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
|
||||||
|
146
main.go
146
main.go
@ -1,6 +1,7 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"crypto/sha256"
|
"crypto/sha256"
|
||||||
"encoding/hex"
|
"encoding/hex"
|
||||||
@ -15,6 +16,7 @@ import (
|
|||||||
|
|
||||||
texttospeech "cloud.google.com/go/texttospeech/apiv1"
|
texttospeech "cloud.google.com/go/texttospeech/apiv1"
|
||||||
"cloud.google.com/go/texttospeech/apiv1/texttospeechpb"
|
"cloud.google.com/go/texttospeech/apiv1/texttospeechpb"
|
||||||
|
"github.com/hyacinthus/mp3join"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Job struct {
|
type Job struct {
|
||||||
@ -28,11 +30,11 @@ func loadJobs(directory string) ([]Job, error) {
|
|||||||
jobs := make([]Job, 0)
|
jobs := make([]Job, 0)
|
||||||
err := filepath.Walk(directory, func(path string, info os.FileInfo, err error) error {
|
err := filepath.Walk(directory, func(path string, info os.FileInfo, err error) error {
|
||||||
if !info.IsDir() && strings.HasSuffix(path, ".json") {
|
if !info.IsDir() && strings.HasSuffix(path, ".json") {
|
||||||
f, err := os.Open(fmt.Sprintf("%s", path))
|
f, err := os.Open(path)
|
||||||
defer f.Close()
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to open file '%s': %v", path, err)
|
return fmt.Errorf("failed to open file '%s': %v", path, err)
|
||||||
}
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
b, err := io.ReadAll(f)
|
b, err := io.ReadAll(f)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -75,27 +77,109 @@ func main() {
|
|||||||
// Check each message if MP3 file already exists, if not then it will synthesize and save the audio to file
|
// Check each message if MP3 file already exists, if not then it will synthesize and save the audio to file
|
||||||
// file names are SHA256 sums of the spoken text
|
// file names are SHA256 sums of the spoken text
|
||||||
for _, job := range jobs {
|
for _, job := range jobs {
|
||||||
|
err = processJob(job)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("job '%s' failed to process: %v", job.Name, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
err = cleanCache(jobs)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("failed to clean cache: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Splits a job
|
||||||
|
func processJob(job Job) error {
|
||||||
|
messages, err := splitJob(job.Message, 12)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to split job '%s': %v", job.Name, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process
|
||||||
|
|
||||||
|
var mp3Files [][]byte
|
||||||
|
|
||||||
|
for i, message := range messages {
|
||||||
|
|
||||||
h := sha256.New()
|
h := sha256.New()
|
||||||
h.Write([]byte(job.Message))
|
h.Write([]byte(message))
|
||||||
sha := h.Sum(nil) // "sha" is uint8 type, encoded in base16
|
sha := h.Sum(nil) // "sha" is uint8 type, encoded in base16
|
||||||
|
|
||||||
filename := fmt.Sprintf("%s-%s.mp3", job.Name, hex.EncodeToString(sha))
|
filename := fmt.Sprintf("jobs/cache/%s-%d-%s.mp3", job.Name, i, hex.EncodeToString(sha))
|
||||||
|
var mp3 []byte
|
||||||
|
|
||||||
if _, err := os.Stat(fmt.Sprintf("%s", filename)); errors.Is(err, os.ErrNotExist) {
|
if _, err := os.Stat(filename); errors.Is(err, os.ErrNotExist) {
|
||||||
mp3, err := getTTS(job.Voice, job.Message, job.Language)
|
mp3, err = getTTS(job.Voice, message, job.Language)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("failed to get TTS: %v", err)
|
return fmt.Errorf("failed to get TTS: %v", err)
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
|
|
||||||
err = os.WriteFile(fmt.Sprintf("%s", filename), mp3, 0644)
|
err = os.WriteFile(filename, mp3, 0644)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("failed to write mp3 file: %v", err)
|
return fmt.Errorf("failed to write mp3 file '%s': %v", filename, err)
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
log.Printf("Audio content written to file: %v\n", filename)
|
log.Printf("Audio content written to file: %v\n", filename)
|
||||||
|
} else {
|
||||||
|
mp3, err = os.ReadFile(filename)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to read mp3 file '%s': %v", filename, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
mp3Files = append(mp3Files, mp3)
|
||||||
|
}
|
||||||
|
|
||||||
|
mp3Joiner := mp3join.New()
|
||||||
|
|
||||||
|
for i, mp3 := range mp3Files {
|
||||||
|
r := bytes.NewReader(mp3)
|
||||||
|
err := mp3Joiner.Append(r)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to join mp3 file '%d': %v", i, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
dest := mp3Joiner.Reader()
|
||||||
|
|
||||||
|
combinedBytes, err := io.ReadAll(dest)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to read combined bytes: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
f, err := os.Create(fmt.Sprintf("jobs/%s.mp3", job.Name))
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to create joined mp3 file: %v", err)
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
_, err = f.Write(combinedBytes)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to write combined bytes to file: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Splits a job.Message by maxSentences - to get around API limitations on max tokens
|
||||||
|
func splitJob(jobMessage string, maxSentences int) ([]string, error) {
|
||||||
|
sentences := strings.Split(jobMessage, ". ")
|
||||||
|
|
||||||
|
messages := make([]string, 0)
|
||||||
|
var message string
|
||||||
|
for i, sentence := range sentences {
|
||||||
|
if i%maxSentences == 0 {
|
||||||
|
if len(message) > 0 {
|
||||||
|
messages = append(messages, message)
|
||||||
|
}
|
||||||
|
message = ""
|
||||||
|
}
|
||||||
|
message = fmt.Sprintf("%s %s.", message, sentence)
|
||||||
|
}
|
||||||
|
if len(message) > 0 {
|
||||||
|
messages = append(messages, message)
|
||||||
|
}
|
||||||
|
|
||||||
|
return messages, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Accepts a string and returns a byteslice of the message in mp3 format, and an error
|
// Accepts a string and returns a byteslice of the message in mp3 format, and an error
|
||||||
@ -176,3 +260,43 @@ func ListVoices() error {
|
|||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func cleanCache(jobs []Job) error {
|
||||||
|
cacheFiles, err := os.ReadDir("jobs/cache")
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to read jobs/cache directory: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, file := range cacheFiles {
|
||||||
|
if file.IsDir() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if !strings.HasSuffix(file.Name(), ".mp3") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
splitName := strings.Split(file.Name(), "-")
|
||||||
|
if len(splitName < 1) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// First, check if this file even has an active job referencing it, if not then delete
|
||||||
|
var foundActiveJob bool
|
||||||
|
for _, job := range jobs {
|
||||||
|
if job.Name == splitName[0] {
|
||||||
|
foundActiveJob = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !foundActiveJob {
|
||||||
|
err = os.Remove(fmt.Sprintf("jobs/cache/%s", file.Name()))
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to remove file '%s': %v", file.Name(), err)
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Second, check if this version of a split job message is the active one, if not then delete
|
||||||
|
// TBD
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user