You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

62 lines
2.0 KiB

import requests
from urllib.parse import quote
import subprocess
import os
import sys
import re
speaker_id="p230"
SUBSTITUTIONS = {
" - ": "--",
"Ave.": "Avenue,",
"Co.": "Company",
}
letter_series = sys.argv[1] or "buffett-partnership"
letter_year = sys.argv[2] or "1956"
real_run = (len(sys.argv) == 4) and bool(sys.argv[3]) or False
fn = f"./{letter_series}/{letter_year}/1-in/bpl-{letter_year}-letter.txt"
pre_fn = f"./{letter_series}/{letter_year}/2-preprocess/bpl-{letter_year}.txt"
output_dir = f"./{letter_series}/{letter_year}/3-wavs"
subprocess.run(['mkdir', '-p', output_dir])
if (not real_run):
print("Dry run, check that sentences are readable.")
def synth_sentence(sentence, i):
text_prompt = quote(sentence)
# If hosted on IPv6 use http://[::1]:5002
# If hosted on IPv4 use http://localhost:5002
query_string = f"http://\[::1\]:5002/api/tts?text={text_prompt}&style_wav=&language_id=&speaker_id={speaker_id}"
print(f"Query string {query_string}")
subprocess.run(['curl', query_string, '-o', f"./{output_dir}/output-{str(i).zfill(3)}.wav"])
with open(fn) as f:
lines = f.readlines()
# Do substitutions first, because often they affect sentence splitting
for i in range(len(lines)):
for (orig,subst) in SUBSTITUTIONS.items():
lines[i] = lines[i].replace(orig, subst)
all_text = ''.join(lines)
sentence_ends = list(re.finditer(r"(\w\w+([\.:;][\s\n])|([\s\n][\s\n]+))", all_text))
start = 0
count = len(sentence_ends)
print(f"{count} sentences found.\n")
for (i, sentence_end) in enumerate(sentence_ends):
sentence = all_text[start:sentence_end.end()].strip()
# Heal any newlines due to awkward breaks from copy-pasta
sentence = " ".join(map(lambda x: x.strip(), sentence.split("\n")))
print(f"Sentence {i}:\n\t\"{sentence}\"\n\t{sentence_end}")
start = sentence_end.end() # no +1 needed since end is already one past the real ending character
if (real_run):
synth_sentence(sentence, i)