import requests
from urllib.parse import quote
import subprocess
import os
import sys
import re

speaker_id="p230"

SUBSTITUTIONS = {
        " - ": "--",
        "Ave.": "Avenue,",
        "Co.": "Company",
        }

letter_series = sys.argv[1] or "buffett-partnership"
letter_year = sys.argv[2] or "1956"
fn_stem = (len(sys.argv) == 4) and bool(sys.argv[3]) or f"{letter_series}-{letter_year}"
real_run = (len(sys.argv) == 5) and bool(sys.argv[4]) or False

fn = f"./{letter_series}/{letter_year}/1-in/{fn_stem}-letter.txt"
pre_fn = f"./{letter_series}/{letter_year}/2-preprocess/{fn_stem}.txt"

output_dir = f"./{letter_series}/{letter_year}/3-wavs"

subprocess.run(['mkdir', '-p', output_dir])

if (not real_run):
    print("Dry run, check that sentences are readable.")

def synth_sentence(sentence, i):
    text_prompt = quote(sentence)
    # If hosted on IPv6 use http://[::1]:5002
    # If hosted on IPv4 use http://localhost:5002
    query_string = f"http://\[::1\]:5002/api/tts?text={text_prompt}&style_wav=&language_id=&speaker_id={speaker_id}"
    print(f"Query string {query_string}")
    subprocess.run(['curl', query_string, '-o', f"./{output_dir}/output-{str(i).zfill(3)}.wav"])

with open(fn) as f:
    lines = f.readlines()

    skip_flag = False
    table_flag = False
    table_header_flag = False
    table_headers = []

    # Do substitutions first, because often they affect sentence splitting
    for i in range(len(lines)):
        if ((not skip_flag) and (lines[i].strip() == "[SKIP]")):
            skip_flag = True
            lines[i] = ""
        elif (skip_flag):
            if (lines[i].strip() == "[/SKIP]"):
                skip_flag = False
            else:
                lines[i] = ""

        if ((not table_flag) and (lines[i].strip() == "[TABLE]")):
           table_flag = True
           lines[i] = "" # Empty lines are pruned below
        elif (table_flag):
            if (not table_header_flag):
                table_header_flag = True
                table_headers = list(map(lambda x: x.replace(".", ""), lines[i].strip().split(" & ")))
                lines[i] = ""
                print(table_headers)
            elif (lines[i].strip() == "[/TABLE]"):
                table_flag = False
                table_header_flag = False
                lines[i] = "" # Empty lines are pruned below
            else:
                # Process normal table row
                table_row = lines[i].strip().split(" ")

                if (len(table_row) != len(table_headers)):
                    print((f"Line {i}: Malformed table, table body row had {len(table_row)} fields, "
                           f"but table header row had {len(table_headers)} fields."))
                    exit(1)

                processed_row = [
                    f"{header} {value}"
                    for (header, value) in zip(table_headers, table_row)
                ]
                lines[i] = "; ".join(processed_row) + " ;\n"
                print(lines[i])

        for (orig,subst) in SUBSTITUTIONS.items():
            lines[i] = lines[i].replace(orig, subst)

    all_text = ''.join(lines)
    sentence_ends = list(re.finditer(r"(\w\w+([\.:;][\s\n])|([\s\n][\s\n]+)|(\s;\n))", all_text))
    
    start = 0
    count = len(sentence_ends)
    print(f"{count} sentences found.\n")

    for (i, sentence_end) in enumerate(sentence_ends):
        sentence = all_text[start:sentence_end.end()].strip()
        # Heal any newlines due to awkward breaks from copy-pasta
        sentence = " ".join(map(lambda x: x.strip(), sentence.split("\n")))
        print(f"Sentence {i}:\n\t\"{sentence}\"\n\t{sentence_end}")
        start = sentence_end.end() # no +1 needed since end is already one past the real ending character

        if len(sentence.strip()) == 0:
            print(f"Empty sentence {i} made it through somehow. Continuing...")
            continue
    
        if (real_run):
            synth_sentence(sentence, i)