package main import ( "bytes" "context" "crypto/sha256" "encoding/hex" "encoding/json" "errors" "fmt" "io" "log" "os" "path/filepath" "strings" texttospeech "cloud.google.com/go/texttospeech/apiv1" "cloud.google.com/go/texttospeech/apiv1/texttospeechpb" "github.com/hyacinthus/mp3join" ) type Job struct { Name string `json:"name"` Voice string `json:"voice"` Language string `json:"language"` Message string `json:"message"` } func loadJobs(directory string) ([]Job, error) { jobs := make([]Job, 0) err := filepath.Walk(directory, func(path string, info os.FileInfo, err error) error { if !info.IsDir() && strings.HasSuffix(path, ".json") { f, err := os.Open(path) if err != nil { return fmt.Errorf("failed to open file '%s': %v", path, err) } defer f.Close() b, err := io.ReadAll(f) if err != nil { return fmt.Errorf("failed to read file '%s': %v", path, err) } job := Job{} err = json.Unmarshal(b, &job) if err != nil { return fmt.Errorf("failed to unmarshal file '%s' into Job struct: %v", path, err) } jobs = append(jobs, job) } return nil }) if err != nil { return nil, fmt.Errorf("failed to walk current directory for json files: %v", err) } return jobs, nil } func main() { if len(os.Args) > 1 { if os.Args[1] == "list" { err := ListVoices() if err != nil { log.Fatalf("failed to list voices: %v", err) } return } } jobs, err := loadJobs("jobs") if err != nil { log.Fatalf("failed to load jobs: %v", err) } // Check each message if MP3 file already exists, if not then it will synthesize and save the audio to file // file names are SHA256 sums of the spoken text for _, job := range jobs { err = processJob(job) if err != nil { log.Printf("job '%s' failed to process: %v", job.Name, err) } } err = cleanCache(jobs) if err != nil { log.Printf("failed to clean cache: %v", err) } } // Splits a job func processJob(job Job) error { messages, err := splitJob(job.Message, 12) if err != nil { return fmt.Errorf("failed to split job '%s': %v", job.Name, err) } // Process var mp3Files [][]byte for i, message := range messages { h := sha256.New() h.Write([]byte(message)) sha := h.Sum(nil) // "sha" is uint8 type, encoded in base16 filename := fmt.Sprintf("jobs/cache/%s-%d-%s.mp3", job.Name, i, hex.EncodeToString(sha)) var mp3 []byte if _, err := os.Stat(filename); errors.Is(err, os.ErrNotExist) { mp3, err = getTTS(job.Voice, message, job.Language) if err != nil { return fmt.Errorf("failed to get TTS: %v", err) } err = os.WriteFile(filename, mp3, 0644) if err != nil { return fmt.Errorf("failed to write mp3 file '%s': %v", filename, err) } log.Printf("Audio content written to file: %v\n", filename) } else { mp3, err = os.ReadFile(filename) if err != nil { return fmt.Errorf("failed to read mp3 file '%s': %v", filename, err) } } mp3Files = append(mp3Files, mp3) } mp3Joiner := mp3join.New() for i, mp3 := range mp3Files { r := bytes.NewReader(mp3) err := mp3Joiner.Append(r) if err != nil { return fmt.Errorf("failed to join mp3 file '%d': %v", i, err) } } dest := mp3Joiner.Reader() combinedBytes, err := io.ReadAll(dest) if err != nil { return fmt.Errorf("failed to read combined bytes: %v", err) } f, err := os.Create(fmt.Sprintf("jobs/%s.mp3", job.Name)) if err != nil { return fmt.Errorf("failed to create joined mp3 file: %v", err) } defer f.Close() _, err = f.Write(combinedBytes) if err != nil { return fmt.Errorf("failed to write combined bytes to file: %v", err) } return nil } // Splits a job.Message by maxSentences - to get around API limitations on max tokens func splitJob(jobMessage string, maxSentences int) ([]string, error) { sentences := strings.Split(jobMessage, ". ") messages := make([]string, 0) var message string for i, sentence := range sentences { if i%maxSentences == 0 { if len(message) > 0 { messages = append(messages, message) } message = "" } message = fmt.Sprintf("%s %s.", message, sentence) } if len(message) > 0 { messages = append(messages, message) } return messages, nil } // Accepts a string and returns a byteslice of the message in mp3 format, and an error func getTTS(voice, message, language string) ([]byte, error) { // Instantiates a client. ctx := context.Background() client, err := texttospeech.NewClient(ctx) if err != nil { return nil, fmt.Errorf("failed to create TTS client: %v", err) } defer client.Close() // Perform the text-to-speech request on the text input with the selected // voice parameters and audio file type. req := texttospeechpb.SynthesizeSpeechRequest{ // Set the text input to be synthesized. Input: &texttospeechpb.SynthesisInput{ InputSource: &texttospeechpb.SynthesisInput_Text{Text: message}, }, // Build the voice request, select the language code ("en-US") and the SSML // voice gender ("neutral"). Voice: &texttospeechpb.VoiceSelectionParams{ LanguageCode: language, Name: voice, //Name: "en-US-Journey-F", //Name: "tr-TR-Wavenet-E", }, // Select the type of audio file you want returned. AudioConfig: &texttospeechpb.AudioConfig{ AudioEncoding: texttospeechpb.AudioEncoding_MP3, EffectsProfileId: []string{"headphone-class-device"}, SpeakingRate: 0.8, }, } resp, err := client.SynthesizeSpeech(ctx, &req) if err != nil { return nil, fmt.Errorf("failed to synthesize speech: %v", err) } return resp.AudioContent, nil } // ListVoices lists the available text to speech voices. func ListVoices() error { ctx := context.Background() client, err := texttospeech.NewClient(ctx) if err != nil { return err } defer client.Close() // Performs the list voices request. resp, err := client.ListVoices(ctx, &texttospeechpb.ListVoicesRequest{}) if err != nil { return err } for _, voice := range resp.Voices { // Display the voice's name. Example: tpc-vocoded fmt.Printf("Name: %v\n", voice.Name) // Display the supported language codes for this voice. Example: "en-US" for _, languageCode := range voice.LanguageCodes { fmt.Printf(" Supported language: %v\n", languageCode) } // Display the SSML Voice Gender. fmt.Printf(" SSML Voice Gender: %v\n", voice.SsmlGender.String()) // Display the natural sample rate hertz for this voice. Example: 24000 fmt.Printf(" Natural Sample Rate Hertz: %v\n", voice.NaturalSampleRateHertz) } return nil } func cleanCache(jobs []Job) error { cacheFiles, err := os.ReadDir("jobs/cache") if err != nil { return fmt.Errorf("failed to read jobs/cache directory: %v", err) } for _, file := range cacheFiles { if file.IsDir() { continue } if !strings.HasSuffix(file.Name(), ".mp3") { continue } splitName := strings.Split(file.Name(), "-") if len(splitName) < 1 { continue } // First, check if this file even has an active job referencing it, if not then delete var foundActiveJob bool for _, job := range jobs { if job.Name == splitName[0] { foundActiveJob = true break } } if !foundActiveJob { err = os.Remove(fmt.Sprintf("jobs/cache/%s", file.Name())) if err != nil { return fmt.Errorf("failed to remove file '%s': %v", file.Name(), err) } continue } // Second, check if this version of a split job message is the active one, if not then delete // TBD } return nil }