|
|
|
|
import requests
|
|
|
|
|
from urllib.parse import quote
|
|
|
|
|
import subprocess
|
|
|
|
|
import os
|
|
|
|
|
import sys
|
|
|
|
|
import re
|
|
|
|
|
|
|
|
|
|
speaker_id="p230"
|
|
|
|
|
|
|
|
|
|
SUBSTITUTIONS = {
|
|
|
|
|
" - ": "--",
|
|
|
|
|
"Ave.": "Avenue,",
|
|
|
|
|
"Co.": "Company",
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
letter_series = sys.argv[1] or "buffett-partnership"
|
|
|
|
|
letter_year = sys.argv[2] or "1956"
|
|
|
|
|
fn_stem = (len(sys.argv) == 4) and bool(sys.argv[3]) or f"{letter_series}-{letter_year}"
|
|
|
|
|
real_run = (len(sys.argv) == 5) and bool(sys.argv[4]) or False
|
|
|
|
|
|
|
|
|
|
fn = f"./{letter_series}/{letter_year}/1-in/{fn_stem}-letter.txt"
|
|
|
|
|
pre_fn = f"./{letter_series}/{letter_year}/2-preprocessed/{fn_stem}-letter.txt"
|
|
|
|
|
|
|
|
|
|
output_dir = f"./{letter_series}/{letter_year}/3-wavs"
|
|
|
|
|
|
|
|
|
|
subprocess.run(['mkdir', '-p', output_dir])
|
|
|
|
|
|
|
|
|
|
if (not real_run):
|
|
|
|
|
print("Dry run, check that sentences are readable.")
|
|
|
|
|
|
|
|
|
|
def synth_sentence(sentence, i):
|
|
|
|
|
text_prompt = quote(sentence)
|
|
|
|
|
# If hosted on IPv6 use http://[::1]:5002
|
|
|
|
|
# If hosted on IPv4 use http://localhost:5002
|
|
|
|
|
query_string = f"http://\[::1\]:5002/api/tts?text={text_prompt}&style_wav=&language_id=&speaker_id={speaker_id}"
|
|
|
|
|
print(f"Query string {query_string}")
|
|
|
|
|
subprocess.run(['curl', query_string, '-o', f"./{output_dir}/output-{str(i).zfill(3)}.wav"])
|
|
|
|
|
|
|
|
|
|
with open(pre_fn) as f:
|
|
|
|
|
lines = f.readlines()
|
|
|
|
|
|
|
|
|
|
skip_flag = False
|
|
|
|
|
table_flag = False
|
|
|
|
|
table_header_flag = False
|
|
|
|
|
table_headers = []
|
|
|
|
|
|
|
|
|
|
# Do substitutions first, because often they affect sentence splitting
|
|
|
|
|
for i in range(len(lines)):
|
|
|
|
|
if ((not skip_flag) and (lines[i].strip() == "[SKIP]")):
|
|
|
|
|
skip_flag = True
|
|
|
|
|
lines[i] = ""
|
|
|
|
|
elif (skip_flag):
|
|
|
|
|
if (lines[i].strip() == "[/SKIP]"):
|
|
|
|
|
skip_flag = False
|
|
|
|
|
else:
|
|
|
|
|
lines[i] = ""
|
|
|
|
|
|
|
|
|
|
if ((not table_flag) and (lines[i].strip() == "[TABLE]")):
|
|
|
|
|
table_flag = True
|
|
|
|
|
lines[i] = "" # Empty lines are pruned below
|
|
|
|
|
elif (table_flag):
|
|
|
|
|
if (not table_header_flag):
|
|
|
|
|
table_header_flag = True
|
|
|
|
|
table_headers = list(map(lambda x: x.replace(".", ""), lines[i].strip().split(" & ")))
|
|
|
|
|
lines[i] = ""
|
|
|
|
|
print(table_headers)
|
|
|
|
|
elif (lines[i].strip() == "[/TABLE]"):
|
|
|
|
|
table_flag = False
|
|
|
|
|
table_header_flag = False
|
|
|
|
|
lines[i] = "" # Empty lines are pruned below
|
|
|
|
|
else:
|
|
|
|
|
# Process normal table row
|
|
|
|
|
table_row = lines[i].strip().split(" ")
|
|
|
|
|
|
|
|
|
|
if (len(table_row) != len(table_headers)):
|
|
|
|
|
print((f"Line {i}: Malformed table, table body row had {len(table_row)} fields, "
|
|
|
|
|
f"but table header row had {len(table_headers)} fields."))
|
|
|
|
|
exit(1)
|
|
|
|
|
|
|
|
|
|
processed_row = [
|
|
|
|
|
f"{header} {value}"
|
|
|
|
|
for (header, value) in zip(table_headers, table_row)
|
|
|
|
|
]
|
|
|
|
|
lines[i] = "; ".join(processed_row) + " ;\n"
|
|
|
|
|
print(lines[i])
|
|
|
|
|
|
|
|
|
|
for (orig,subst) in SUBSTITUTIONS.items():
|
|
|
|
|
lines[i] = lines[i].replace(orig, subst)
|
|
|
|
|
|
|
|
|
|
all_text = ''.join(lines)
|
|
|
|
|
sentence_ends = list(re.finditer(r"(\w\w+([\.:;][\s\n])|([\s\n][\s\n]+)|(\s;\n))", all_text))
|
|
|
|
|
|
|
|
|
|
start = 0
|
|
|
|
|
count = len(sentence_ends)
|
|
|
|
|
print(f"{count} sentences found.\n")
|
|
|
|
|
|
|
|
|
|
for (i, sentence_end) in enumerate(sentence_ends):
|
|
|
|
|
sentence = all_text[start:sentence_end.end()].strip()
|
|
|
|
|
# Heal any newlines due to awkward breaks from copy-pasta
|
|
|
|
|
sentence = " ".join(map(lambda x: x.strip(), sentence.split("\n")))
|
|
|
|
|
print(f"Sentence {i}:\n\t\"{sentence}\"\n\t{sentence_end}")
|
|
|
|
|
start = sentence_end.end() # no +1 needed since end is already one past the real ending character
|
|
|
|
|
|
|
|
|
|
if len(sentence.strip()) == 0:
|
|
|
|
|
print(f"Empty sentence {i} made it through somehow. Continuing...")
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
if (real_run):
|
|
|
|
|
synth_sentence(sentence, i)
|