|
|
|
|
@ -3,25 +3,46 @@ from urllib.parse import quote
|
|
|
|
|
import subprocess
|
|
|
|
|
import os
|
|
|
|
|
import sys
|
|
|
|
|
import re
|
|
|
|
|
|
|
|
|
|
speaker_id="p230"
|
|
|
|
|
|
|
|
|
|
letter_year = sys.argv[1]
|
|
|
|
|
letter_series = sys.argv[1] or "buffett-partnership"
|
|
|
|
|
letter_year = sys.argv[2] or "1956"
|
|
|
|
|
real_run = (len(sys.argv) == 4) and bool(sys.argv[3]) or False
|
|
|
|
|
|
|
|
|
|
fn = f"./annual-letters/bpl-{letter_year}.txt"
|
|
|
|
|
fn = f"./{letter_series}/{letter_year}/1-in/bpl-{letter_year}-letter.txt"
|
|
|
|
|
pre_fn = f"./{letter_series}/{letter_year}/2-preprocess/bpl-{letter_year}.txt"
|
|
|
|
|
|
|
|
|
|
output_dir = f"./wavs/bpl-{letter_year}/"
|
|
|
|
|
output_dir = f"./{letter_series}/{letter_year}/3-wavs"
|
|
|
|
|
|
|
|
|
|
subprocess.run(['mkdir', '-p', output_dir])
|
|
|
|
|
|
|
|
|
|
if (not real_run):
|
|
|
|
|
print("Dry run, check that sentences are readable.")
|
|
|
|
|
|
|
|
|
|
def synth_sentence(sentence, i):
|
|
|
|
|
text_prompt = quote(sentence)
|
|
|
|
|
# If hosted on IPv6 use http://[::1]:5002
|
|
|
|
|
# If hosted on IPv4 use http://localhost:5002
|
|
|
|
|
query_string = f"http://\[::1\]:5002/api/tts?text={text_prompt}&style_wav=&language_id=&speaker_id={speaker_id}"
|
|
|
|
|
print(f"Query string {query_string}")
|
|
|
|
|
subprocess.run(['curl', query_string, '-o', f"./{output_dir}/output-{str(i).zfill(3)}.wav"])
|
|
|
|
|
|
|
|
|
|
with open(fn) as f:
|
|
|
|
|
lines = f.readlines()
|
|
|
|
|
all_text = ''.join(lines)
|
|
|
|
|
sentences = all_text.split('.')
|
|
|
|
|
count = len(sentences)
|
|
|
|
|
|
|
|
|
|
for (i, sentence) in enumerate(sentences):
|
|
|
|
|
text_prompt = quote(sentence)
|
|
|
|
|
query_string = f"http://localhost:5002/api/tts?text={text_prompt}&speaker_id={speaker_id}&style_wav=&language_id="
|
|
|
|
|
subprocess.run(['curl', query_string, '-o', f"./{output_dir}/output-{str(i).zfill(3)}.wav"])
|
|
|
|
|
|
|
|
|
|
sentence_ends = list(re.finditer(r"(\w\w+([\.:][\s\n])|([\s\n][\s\n]+))", all_text))
|
|
|
|
|
|
|
|
|
|
start = 0
|
|
|
|
|
count = len(sentence_ends)
|
|
|
|
|
print(f"{count} sentences found.\n")
|
|
|
|
|
|
|
|
|
|
for (i, sentence_end) in enumerate(sentence_ends):
|
|
|
|
|
sentence = all_text[start:sentence_end.end()].strip()
|
|
|
|
|
# Heal any newlines due to awkward breaks from copy-pasta
|
|
|
|
|
sentence = " ".join(map(lambda x: x.strip(), sentence.split("\n")))
|
|
|
|
|
print(f"Sentence {i}:\n\t\"{sentence}\"\n\t{sentence_end}")
|
|
|
|
|
start = sentence_end.end() # no +1 needed since end is already one past the real ending character
|
|
|
|
|
if (real_run):
|
|
|
|
|
synth_sentence(sentence, i)
|
|
|
|
|
|