tts/main.go

package main

import (
	"context"
	"crypto/sha256"
	"encoding/hex"
	"encoding/json"
	"errors"
	"fmt"
	"io"
	"log"
	"os"
	"path/filepath"
	"strings"

	texttospeech "cloud.google.com/go/texttospeech/apiv1"
	"cloud.google.com/go/texttospeech/apiv1/texttospeechpb"
)

type Job struct {
	Name     string `json:"name"`
	Voice    string `json:"voice"`
	Language string `json:"language"`
	Message  string `json:"message"`
}

func loadJobs(directory string) ([]Job, error) {
	jobs := make([]Job, 0)
	err := filepath.Walk(directory, func(path string, info os.FileInfo, err error) error {
		if !info.IsDir() && strings.HasSuffix(path, ".json") {
			f, err := os.Open(fmt.Sprintf("%s", path))
			defer f.Close()
			if err != nil {
				return fmt.Errorf("failed to open file '%s': %v", path, err)
			}

			b, err := io.ReadAll(f)
			if err != nil {
				return fmt.Errorf("failed to read file '%s': %v", path, err)
			}

			job := Job{}
			err = json.Unmarshal(b, &job)
			if err != nil {
				return fmt.Errorf("failed to unmarshal file '%s' into Job struct: %v", path, err)
			}
			jobs = append(jobs, job)
		}
		return nil
	})

	if err != nil {
		return nil, fmt.Errorf("failed to walk current directory for json files: %v", err)
	}
	return jobs, nil
}

func main() {

	if len(os.Args) > 1 {
		if os.Args[1] == "list" {
			err := ListVoices()
			if err != nil {
				log.Fatalf("failed to list voices: %v", err)
			}
			return
		}
	}

	jobs, err := loadJobs("jobs")
	if err != nil {
		log.Fatalf("failed to load jobs: %v", err)
	}

	// Check each message if MP3 file already exists, if not then it will synthesize and save the audio to file
	// file names are SHA256 sums of the spoken text
	for _, job := range jobs {
		h := sha256.New()
		h.Write([]byte(job.Message))
		sha := h.Sum(nil) // "sha" is uint8 type, encoded in base16

		filename := fmt.Sprintf("%s-%s.mp3", job.Name, hex.EncodeToString(sha))

		if _, err := os.Stat(fmt.Sprintf("%s", filename)); errors.Is(err, os.ErrNotExist) {
			mp3, err := getTTS(job.Voice, job.Message, job.Language)
			if err != nil {
				log.Printf("failed to get TTS: %v", err)
				continue
			}

			err = os.WriteFile(fmt.Sprintf("%s", filename), mp3, 0644)
			if err != nil {
				log.Printf("failed to write mp3 file: %v", err)
				continue
			}
			log.Printf("Audio content written to file: %v\n", filename)
		}
	}
}

// Accepts a string and returns a byteslice of the message in mp3 format, and an error
func getTTS(voice, message, language string) ([]byte, error) {

	// Instantiates a client.
	ctx := context.Background()

	client, err := texttospeech.NewClient(ctx)
	if err != nil {
		return nil, fmt.Errorf("failed to create TTS client: %v", err)
	}
	defer client.Close()

	// Perform the text-to-speech request on the text input with the selected
	// voice parameters and audio file type.
	req := texttospeechpb.SynthesizeSpeechRequest{
		// Set the text input to be synthesized.
		Input: &texttospeechpb.SynthesisInput{
			InputSource: &texttospeechpb.SynthesisInput_Text{Text: message},
		},
		// Build the voice request, select the language code ("en-US") and the SSML
		// voice gender ("neutral").
		Voice: &texttospeechpb.VoiceSelectionParams{
			LanguageCode: language,
			Name:         voice,
			//Name: "en-US-Journey-F",
			//Name: "tr-TR-Wavenet-E",
		},
		// Select the type of audio file you want returned.
		AudioConfig: &texttospeechpb.AudioConfig{
			AudioEncoding:    texttospeechpb.AudioEncoding_MP3,
			EffectsProfileId: []string{"headphone-class-device"},
			SpeakingRate:     0.8,
		},
	}

	resp, err := client.SynthesizeSpeech(ctx, &req)
	if err != nil {
		return nil, fmt.Errorf("failed to synthesize speech: %v", err)
	}

	return resp.AudioContent, nil
}

// ListVoices lists the available text to speech voices.
func ListVoices() error {
	ctx := context.Background()

	client, err := texttospeech.NewClient(ctx)
	if err != nil {
		return err
	}
	defer client.Close()

	// Performs the list voices request.
	resp, err := client.ListVoices(ctx, &texttospeechpb.ListVoicesRequest{})
	if err != nil {
		return err
	}

	for _, voice := range resp.Voices {
		// Display the voice's name. Example: tpc-vocoded
		fmt.Printf("Name: %v\n", voice.Name)

		// Display the supported language codes for this voice. Example: "en-US"
		for _, languageCode := range voice.LanguageCodes {
			fmt.Printf("  Supported language: %v\n", languageCode)
		}

		// Display the SSML Voice Gender.
		fmt.Printf("  SSML Voice Gender: %v\n", voice.SsmlGender.String())

		// Display the natural sample rate hertz for this voice. Example: 24000
		fmt.Printf("  Natural Sample Rate Hertz: %v\n",
			voice.NaturalSampleRateHertz)
	}

	return nil
}
initial commit 2024-08-04 04:13:29 +00:00			`package main`

			`import (`
			`"context"`
			`"crypto/sha256"`
			`"encoding/hex"`
			`"encoding/json"`
			`"errors"`
			`"fmt"`
			`"io"`
			`"log"`
			`"os"`
			`"path/filepath"`
			`"strings"`

			`texttospeech "cloud.google.com/go/texttospeech/apiv1"`
			`"cloud.google.com/go/texttospeech/apiv1/texttospeechpb"`
			`)`

			`type Job struct {`
			Name string `json:"name"`
			Voice string `json:"voice"`
			Language string `json:"language"`
			Message string `json:"message"`
			`}`

			`func loadJobs(directory string) ([]Job, error) {`
			`jobs := make([]Job, 0)`
			`err := filepath.Walk(directory, func(path string, info os.FileInfo, err error) error {`
			`if !info.IsDir() && strings.HasSuffix(path, ".json") {`
			`f, err := os.Open(fmt.Sprintf("%s", path))`
			`defer f.Close()`
			`if err != nil {`
			`return fmt.Errorf("failed to open file '%s': %v", path, err)`
			`}`

			`b, err := io.ReadAll(f)`
			`if err != nil {`
			`return fmt.Errorf("failed to read file '%s': %v", path, err)`
			`}`

			`job := Job{}`
			`err = json.Unmarshal(b, &job)`
			`if err != nil {`
			`return fmt.Errorf("failed to unmarshal file '%s' into Job struct: %v", path, err)`
			`}`
			`jobs = append(jobs, job)`
			`}`
			`return nil`
			`})`

			`if err != nil {`
			`return nil, fmt.Errorf("failed to walk current directory for json files: %v", err)`
			`}`
			`return jobs, nil`
			`}`

			`func main() {`

			`if len(os.Args) > 1 {`
			`if os.Args[1] == "list" {`
			`err := ListVoices()`
			`if err != nil {`
			`log.Fatalf("failed to list voices: %v", err)`
			`}`
			`return`
			`}`
			`}`

			`jobs, err := loadJobs("jobs")`
			`if err != nil {`
			`log.Fatalf("failed to load jobs: %v", err)`
			`}`

			`// Check each message if MP3 file already exists, if not then it will synthesize and save the audio to file`
			`// file names are SHA256 sums of the spoken text`
			`for _, job := range jobs {`
			`h := sha256.New()`
			`h.Write([]byte(job.Message))`
			`sha := h.Sum(nil) // "sha" is uint8 type, encoded in base16`

			`filename := fmt.Sprintf("%s-%s.mp3", job.Name, hex.EncodeToString(sha))`

			`if _, err := os.Stat(fmt.Sprintf("%s", filename)); errors.Is(err, os.ErrNotExist) {`
			`mp3, err := getTTS(job.Voice, job.Message, job.Language)`
			`if err != nil {`
			`log.Printf("failed to get TTS: %v", err)`
			`continue`
			`}`

			`err = os.WriteFile(fmt.Sprintf("%s", filename), mp3, 0644)`
			`if err != nil {`
			`log.Printf("failed to write mp3 file: %v", err)`
			`continue`
			`}`
			`log.Printf("Audio content written to file: %v\n", filename)`
			`}`
			`}`
			`}`

			`// Accepts a string and returns a byteslice of the message in mp3 format, and an error`
			`func getTTS(voice, message, language string) ([]byte, error) {`

			`// Instantiates a client.`
			`ctx := context.Background()`

			`client, err := texttospeech.NewClient(ctx)`
			`if err != nil {`
			`return nil, fmt.Errorf("failed to create TTS client: %v", err)`
			`}`
			`defer client.Close()`

			`// Perform the text-to-speech request on the text input with the selected`
			`// voice parameters and audio file type.`
			`req := texttospeechpb.SynthesizeSpeechRequest{`
			`// Set the text input to be synthesized.`
			`Input: &texttospeechpb.SynthesisInput{`
			`InputSource: &texttospeechpb.SynthesisInput_Text{Text: message},`
			`},`
			`// Build the voice request, select the language code ("en-US") and the SSML`
			`// voice gender ("neutral").`
			`Voice: &texttospeechpb.VoiceSelectionParams{`
			`LanguageCode: language,`
			`Name: voice,`
			`//Name: "en-US-Journey-F",`
			`//Name: "tr-TR-Wavenet-E",`
			`},`
			`// Select the type of audio file you want returned.`
			`AudioConfig: &texttospeechpb.AudioConfig{`
			`AudioEncoding: texttospeechpb.AudioEncoding_MP3,`
			`EffectsProfileId: []string{"headphone-class-device"},`
			`SpeakingRate: 0.8,`
			`},`
			`}`

			`resp, err := client.SynthesizeSpeech(ctx, &req)`
			`if err != nil {`
			`return nil, fmt.Errorf("failed to synthesize speech: %v", err)`
			`}`

			`return resp.AudioContent, nil`
			`}`

			`// ListVoices lists the available text to speech voices.`
			`func ListVoices() error {`
			`ctx := context.Background()`

			`client, err := texttospeech.NewClient(ctx)`
			`if err != nil {`
			`return err`
			`}`
			`defer client.Close()`

			`// Performs the list voices request.`
			`resp, err := client.ListVoices(ctx, &texttospeechpb.ListVoicesRequest{})`
			`if err != nil {`
			`return err`
			`}`

			`for _, voice := range resp.Voices {`
			`// Display the voice's name. Example: tpc-vocoded`
			`fmt.Printf("Name: %v\n", voice.Name)`

			`// Display the supported language codes for this voice. Example: "en-US"`
			`for _, languageCode := range voice.LanguageCodes {`
			`fmt.Printf(" Supported language: %v\n", languageCode)`
			`}`

			`// Display the SSML Voice Gender.`
			`fmt.Printf(" SSML Voice Gender: %v\n", voice.SsmlGender.String())`

			`// Display the natural sample rate hertz for this voice. Example: 24000`
			`fmt.Printf(" Natural Sample Rate Hertz: %v\n",`
			`voice.NaturalSampleRateHertz)`
			`}`

			`return nil`
			`}`