2024-08-04 04:13:29 +00:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
2024-08-07 00:01:23 +00:00
|
|
|
"bytes"
|
2024-08-04 04:13:29 +00:00
|
|
|
"context"
|
|
|
|
"crypto/sha256"
|
|
|
|
"encoding/hex"
|
|
|
|
"encoding/json"
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"log"
|
|
|
|
"os"
|
|
|
|
"path/filepath"
|
|
|
|
"strings"
|
|
|
|
|
|
|
|
texttospeech "cloud.google.com/go/texttospeech/apiv1"
|
|
|
|
"cloud.google.com/go/texttospeech/apiv1/texttospeechpb"
|
2024-09-04 21:49:05 +00:00
|
|
|
"github.com/moutend/go-wav"
|
2024-08-04 04:13:29 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
type Job struct {
|
|
|
|
Name string `json:"name"`
|
|
|
|
Voice string `json:"voice"`
|
|
|
|
Language string `json:"language"`
|
|
|
|
Message string `json:"message"`
|
|
|
|
}
|
|
|
|
|
|
|
|
func loadJobs(directory string) ([]Job, error) {
|
|
|
|
jobs := make([]Job, 0)
|
|
|
|
err := filepath.Walk(directory, func(path string, info os.FileInfo, err error) error {
|
|
|
|
if !info.IsDir() && strings.HasSuffix(path, ".json") {
|
2024-08-07 00:01:23 +00:00
|
|
|
f, err := os.Open(path)
|
2024-08-04 04:13:29 +00:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed to open file '%s': %v", path, err)
|
|
|
|
}
|
2024-08-07 00:01:23 +00:00
|
|
|
defer f.Close()
|
2024-08-04 04:13:29 +00:00
|
|
|
|
|
|
|
b, err := io.ReadAll(f)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed to read file '%s': %v", path, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
job := Job{}
|
|
|
|
err = json.Unmarshal(b, &job)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed to unmarshal file '%s' into Job struct: %v", path, err)
|
|
|
|
}
|
|
|
|
jobs = append(jobs, job)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
})
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to walk current directory for json files: %v", err)
|
|
|
|
}
|
|
|
|
return jobs, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func main() {
|
|
|
|
|
|
|
|
if len(os.Args) > 1 {
|
|
|
|
if os.Args[1] == "list" {
|
|
|
|
err := ListVoices()
|
|
|
|
if err != nil {
|
|
|
|
log.Fatalf("failed to list voices: %v", err)
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
jobs, err := loadJobs("jobs")
|
|
|
|
if err != nil {
|
|
|
|
log.Fatalf("failed to load jobs: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check each message if MP3 file already exists, if not then it will synthesize and save the audio to file
|
|
|
|
// file names are SHA256 sums of the spoken text
|
|
|
|
for _, job := range jobs {
|
2024-08-07 00:01:23 +00:00
|
|
|
err = processJob(job)
|
|
|
|
if err != nil {
|
|
|
|
log.Printf("job '%s' failed to process: %v", job.Name, err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
err = cleanCache(jobs)
|
|
|
|
if err != nil {
|
|
|
|
log.Printf("failed to clean cache: %v", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Splits a job
|
|
|
|
func processJob(job Job) error {
|
|
|
|
messages, err := splitJob(job.Message, 12)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed to split job '%s': %v", job.Name, err)
|
|
|
|
}
|
|
|
|
|
2024-09-04 21:49:05 +00:00
|
|
|
outWav, err := wav.New(24000, 16, 1)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed to create output wav: %v", err)
|
|
|
|
}
|
2024-08-07 00:01:23 +00:00
|
|
|
|
2024-09-04 21:49:05 +00:00
|
|
|
// Process
|
2024-08-07 00:01:23 +00:00
|
|
|
for i, message := range messages {
|
|
|
|
|
2024-08-04 04:13:29 +00:00
|
|
|
h := sha256.New()
|
2024-08-07 00:01:23 +00:00
|
|
|
h.Write([]byte(message))
|
2024-08-04 04:13:29 +00:00
|
|
|
sha := h.Sum(nil) // "sha" is uint8 type, encoded in base16
|
|
|
|
|
2024-09-04 21:49:05 +00:00
|
|
|
filename := fmt.Sprintf("jobs/cache/%s-%d-%s.wav", job.Name, i, hex.EncodeToString(sha))
|
|
|
|
var wav []byte
|
2024-08-07 00:01:23 +00:00
|
|
|
if _, err := os.Stat(filename); errors.Is(err, os.ErrNotExist) {
|
2024-09-04 21:49:05 +00:00
|
|
|
wav, err = getTTS(job.Voice, message, job.Language)
|
2024-08-04 04:13:29 +00:00
|
|
|
if err != nil {
|
2024-08-07 00:01:23 +00:00
|
|
|
return fmt.Errorf("failed to get TTS: %v", err)
|
2024-08-04 04:13:29 +00:00
|
|
|
}
|
|
|
|
|
2024-09-04 21:49:05 +00:00
|
|
|
err = os.WriteFile(filename, wav, 0644)
|
2024-08-04 04:13:29 +00:00
|
|
|
if err != nil {
|
2024-09-04 21:49:05 +00:00
|
|
|
return fmt.Errorf("failed to write wav file '%s': %v", filename, err)
|
2024-08-04 04:13:29 +00:00
|
|
|
}
|
|
|
|
log.Printf("Audio content written to file: %v\n", filename)
|
2024-08-07 00:01:23 +00:00
|
|
|
} else {
|
2024-09-04 21:49:05 +00:00
|
|
|
wav, err = os.ReadFile(filename)
|
2024-08-07 00:01:23 +00:00
|
|
|
if err != nil {
|
2024-09-04 21:49:05 +00:00
|
|
|
return fmt.Errorf("failed to read wav file '%s': %v", filename, err)
|
2024-08-07 00:01:23 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-09-04 21:49:05 +00:00
|
|
|
r := bytes.NewReader(wav)
|
|
|
|
_, err = io.Copy(outWav, r)
|
2024-08-07 00:01:23 +00:00
|
|
|
if err != nil {
|
2024-09-04 21:49:05 +00:00
|
|
|
return fmt.Errorf("failed to copy message wav to output wav: %v", err)
|
2024-08-07 00:01:23 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-09-04 21:49:05 +00:00
|
|
|
b, err := wav.Marshal(outWav)
|
2024-08-07 00:01:23 +00:00
|
|
|
if err != nil {
|
2024-09-04 21:49:05 +00:00
|
|
|
return fmt.Errorf("failed to marshal outWav to bytes: %v", err)
|
2024-08-07 00:01:23 +00:00
|
|
|
}
|
|
|
|
|
2024-09-04 21:49:05 +00:00
|
|
|
f, err := os.Create(fmt.Sprintf("jobs/%s.wav", job.Name))
|
2024-08-07 00:01:23 +00:00
|
|
|
if err != nil {
|
2024-09-04 21:49:05 +00:00
|
|
|
return fmt.Errorf("failed to create joined wav file: %v", err)
|
2024-08-07 00:01:23 +00:00
|
|
|
}
|
|
|
|
defer f.Close()
|
|
|
|
|
2024-09-04 21:49:05 +00:00
|
|
|
_, err = f.Write(b)
|
2024-08-07 00:01:23 +00:00
|
|
|
if err != nil {
|
2024-09-04 21:49:05 +00:00
|
|
|
return fmt.Errorf("failed to write outWav bytes to file: %v", err)
|
2024-08-07 00:01:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Splits a job.Message by maxSentences - to get around API limitations on max tokens
|
|
|
|
func splitJob(jobMessage string, maxSentences int) ([]string, error) {
|
|
|
|
sentences := strings.Split(jobMessage, ". ")
|
|
|
|
|
|
|
|
messages := make([]string, 0)
|
|
|
|
var message string
|
|
|
|
for i, sentence := range sentences {
|
|
|
|
if i%maxSentences == 0 {
|
|
|
|
if len(message) > 0 {
|
|
|
|
messages = append(messages, message)
|
|
|
|
}
|
|
|
|
message = ""
|
2024-08-04 04:13:29 +00:00
|
|
|
}
|
2024-08-07 00:01:23 +00:00
|
|
|
message = fmt.Sprintf("%s %s.", message, sentence)
|
2024-08-04 04:13:29 +00:00
|
|
|
}
|
2024-08-07 00:01:23 +00:00
|
|
|
if len(message) > 0 {
|
|
|
|
messages = append(messages, message)
|
|
|
|
}
|
|
|
|
|
|
|
|
return messages, nil
|
2024-08-04 04:13:29 +00:00
|
|
|
}
|
|
|
|
|
2024-09-04 21:49:05 +00:00
|
|
|
// Accepts a string and returns a byteslice of the message in wav format, and an error
|
2024-08-04 04:13:29 +00:00
|
|
|
func getTTS(voice, message, language string) ([]byte, error) {
|
|
|
|
|
|
|
|
// Instantiates a client.
|
|
|
|
ctx := context.Background()
|
|
|
|
|
|
|
|
client, err := texttospeech.NewClient(ctx)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to create TTS client: %v", err)
|
|
|
|
}
|
|
|
|
defer client.Close()
|
|
|
|
|
|
|
|
// Perform the text-to-speech request on the text input with the selected
|
|
|
|
// voice parameters and audio file type.
|
|
|
|
req := texttospeechpb.SynthesizeSpeechRequest{
|
|
|
|
// Set the text input to be synthesized.
|
|
|
|
Input: &texttospeechpb.SynthesisInput{
|
|
|
|
InputSource: &texttospeechpb.SynthesisInput_Text{Text: message},
|
|
|
|
},
|
|
|
|
// Build the voice request, select the language code ("en-US") and the SSML
|
|
|
|
// voice gender ("neutral").
|
|
|
|
Voice: &texttospeechpb.VoiceSelectionParams{
|
|
|
|
LanguageCode: language,
|
|
|
|
Name: voice,
|
|
|
|
//Name: "en-US-Journey-F",
|
|
|
|
//Name: "tr-TR-Wavenet-E",
|
|
|
|
},
|
|
|
|
// Select the type of audio file you want returned.
|
|
|
|
AudioConfig: &texttospeechpb.AudioConfig{
|
2024-09-04 21:49:05 +00:00
|
|
|
AudioEncoding: texttospeechpb.AudioEncoding_LINEAR16,
|
2024-08-04 04:13:29 +00:00
|
|
|
EffectsProfileId: []string{"headphone-class-device"},
|
2024-09-04 21:49:05 +00:00
|
|
|
//SpeakingRate: 0.8,
|
2024-08-04 04:13:29 +00:00
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
resp, err := client.SynthesizeSpeech(ctx, &req)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to synthesize speech: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
return resp.AudioContent, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// ListVoices lists the available text to speech voices.
|
|
|
|
func ListVoices() error {
|
|
|
|
ctx := context.Background()
|
|
|
|
|
|
|
|
client, err := texttospeech.NewClient(ctx)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer client.Close()
|
|
|
|
|
|
|
|
// Performs the list voices request.
|
|
|
|
resp, err := client.ListVoices(ctx, &texttospeechpb.ListVoicesRequest{})
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, voice := range resp.Voices {
|
|
|
|
// Display the voice's name. Example: tpc-vocoded
|
|
|
|
fmt.Printf("Name: %v\n", voice.Name)
|
|
|
|
|
|
|
|
// Display the supported language codes for this voice. Example: "en-US"
|
|
|
|
for _, languageCode := range voice.LanguageCodes {
|
|
|
|
fmt.Printf(" Supported language: %v\n", languageCode)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Display the SSML Voice Gender.
|
|
|
|
fmt.Printf(" SSML Voice Gender: %v\n", voice.SsmlGender.String())
|
|
|
|
|
|
|
|
// Display the natural sample rate hertz for this voice. Example: 24000
|
|
|
|
fmt.Printf(" Natural Sample Rate Hertz: %v\n",
|
|
|
|
voice.NaturalSampleRateHertz)
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
2024-08-07 00:01:23 +00:00
|
|
|
|
|
|
|
func cleanCache(jobs []Job) error {
|
|
|
|
cacheFiles, err := os.ReadDir("jobs/cache")
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed to read jobs/cache directory: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, file := range cacheFiles {
|
|
|
|
if file.IsDir() {
|
|
|
|
continue
|
|
|
|
}
|
2024-09-04 21:49:05 +00:00
|
|
|
if !strings.HasSuffix(file.Name(), ".wav") {
|
2024-08-07 00:01:23 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
splitName := strings.Split(file.Name(), "-")
|
2024-08-22 22:29:14 +00:00
|
|
|
if len(splitName) < 1 {
|
2024-08-07 00:01:23 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// First, check if this file even has an active job referencing it, if not then delete
|
|
|
|
var foundActiveJob bool
|
|
|
|
for _, job := range jobs {
|
|
|
|
if job.Name == splitName[0] {
|
|
|
|
foundActiveJob = true
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if !foundActiveJob {
|
|
|
|
err = os.Remove(fmt.Sprintf("jobs/cache/%s", file.Name()))
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed to remove file '%s': %v", file.Name(), err)
|
|
|
|
}
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// Second, check if this version of a split job message is the active one, if not then delete
|
|
|
|
// TBD
|
|
|
|
}
|
2024-08-22 22:29:14 +00:00
|
|
|
|
|
|
|
return nil
|
2024-08-07 00:01:23 +00:00
|
|
|
}
|