diff --git a/clean_html.py b/clean_html.py
new file mode 100644
index 0000000..0c377bd
--- /dev/null
+++ b/clean_html.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python3
+import sys
+
+fn = sys.argv[1]
+out_fn = sys.argv[2]
+
+from bs4 import BeautifulSoup
+
+def strip_html_tags(html):
+    soup = BeautifulSoup(html, 'html.parser')
+
+    # Remove script tags
+    for script in soup.find_all('script'):
+        script.extract()
+
+    # Extract text from remaining tags
+    text = soup.get_text()
+
+    # Remove leading/trailing white spaces and newlines
+    text = text.strip()
+
+    return text
+
+def read_file(file_path):
+    with open(file_path, 'rb') as file:
+        byte_sequence = file.read()
+        while True:
+            try:
+                html_content = byte_sequence.decode('utf-8')
+                break  # Decoding successful, exit the loop
+            except UnicodeDecodeError as e:
+                byte_position = e.args[2]
+                byte_sequence = byte_sequence[:byte_position] + byte_sequence[byte_position+1:]
+
+    return html_content
+
+# Read the HTML file
+with open(fn, 'r') as file:
+    html_content = read_file(fn)
+    stripped_text = strip_html_tags(html_content)
+    with open(out_fn, "w") as of:
+        of.write(stripped_text)
+        of.flush()
diff --git a/speak.py b/speak.py
index d7e940c..2dfcdfc 100644
--- a/speak.py
+++ b/speak.py
@@ -19,7 +19,7 @@ fn_stem = (len(sys.argv) == 4) and bool(sys.argv[3]) or f"{letter_series}-{lette
 real_run = (len(sys.argv) == 5) and bool(sys.argv[4]) or False
 
 fn = f"./{letter_series}/{letter_year}/1-in/{fn_stem}-letter.txt"
-pre_fn = f"./{letter_series}/{letter_year}/2-preprocess/{fn_stem}.txt"
+pre_fn = f"./{letter_series}/{letter_year}/2-preprocessed/{fn_stem}-letter.txt"
 
 output_dir = f"./{letter_series}/{letter_year}/3-wavs"
 
@@ -36,7 +36,7 @@ def synth_sentence(sentence, i):
     print(f"Query string {query_string}")
     subprocess.run(['curl', query_string, '-o', f"./{output_dir}/output-{str(i).zfill(3)}.wav"])
 
-with open(fn) as f:
+with open(pre_fn) as f:
     lines = f.readlines()
 
     skip_flag = False