diff --git a/speak.py b/speak.py index d3f90dd..a702cec 100644 --- a/speak.py +++ b/speak.py @@ -3,25 +3,46 @@ from urllib.parse import quote import subprocess import os import sys +import re speaker_id="p230" -letter_year = sys.argv[1] +letter_series = sys.argv[1] or "buffett-partnership" +letter_year = sys.argv[2] or "1956" +real_run = (len(sys.argv) == 4) and bool(sys.argv[3]) or False -fn = f"./annual-letters/bpl-{letter_year}.txt" +fn = f"./{letter_series}/{letter_year}/1-in/bpl-{letter_year}-letter.txt" +pre_fn = f"./{letter_series}/{letter_year}/2-preprocess/bpl-{letter_year}.txt" -output_dir = f"./wavs/bpl-{letter_year}/" +output_dir = f"./{letter_series}/{letter_year}/3-wavs" subprocess.run(['mkdir', '-p', output_dir]) +if (not real_run): + print("Dry run, check that sentences are readable.") + +def synth_sentence(sentence, i): + text_prompt = quote(sentence) + # If hosted on IPv6 use http://[::1]:5002 + # If hosted on IPv4 use http://localhost:5002 + query_string = f"http://\[::1\]:5002/api/tts?text={text_prompt}&style_wav=&language_id=&speaker_id={speaker_id}" + print(f"Query string {query_string}") + subprocess.run(['curl', query_string, '-o', f"./{output_dir}/output-{str(i).zfill(3)}.wav"]) + with open(fn) as f: lines = f.readlines() all_text = ''.join(lines) - sentences = all_text.split('.') - count = len(sentences) - - for (i, sentence) in enumerate(sentences): - text_prompt = quote(sentence) - query_string = f"http://localhost:5002/api/tts?text={text_prompt}&speaker_id={speaker_id}&style_wav=&language_id=" - subprocess.run(['curl', query_string, '-o', f"./{output_dir}/output-{str(i).zfill(3)}.wav"]) - + sentence_ends = list(re.finditer(r"(\w\w+([\.:][\s\n])|([\s\n][\s\n]+))", all_text)) + + start = 0 + count = len(sentence_ends) + print(f"{count} sentences found.\n") + + for (i, sentence_end) in enumerate(sentence_ends): + sentence = all_text[start:sentence_end.end()].strip() + # Heal any newlines due to awkward breaks from copy-pasta + sentence = " ".join(map(lambda x: x.strip(), sentence.split("\n"))) + print(f"Sentence {i}:\n\t\"{sentence}\"\n\t{sentence_end}") + start = sentence_end.end() # no +1 needed since end is already one past the real ending character + if (real_run): + synth_sentence(sentence, i)