parent
86b4e9c0e0
commit
c42199eaad
@ -0,0 +1,2 @@
|
|||||||
|
rnn.raw
|
||||||
|
temp.raw
|
||||||
@ -0,0 +1,39 @@
|
|||||||
|
|
||||||
|
## Index of Berkshire Annual Meeting Transcripts
|
||||||
|
|
||||||
|
From Warren Buffett archive at CNBC.
|
||||||
|
|
||||||
|
* https://buffett.cnbc.com/video/1994/04/25/morning-session---1994-berkshire-hathaway-annual-meeting.html
|
||||||
|
|
||||||
|
# HOWTO To Create Training Set of GOAT Warren Buffett for Mozilla TTS
|
||||||
|
|
||||||
|
Following https://stackoverflow.com/a/66307612
|
||||||
|
|
||||||
|
Use the `LiveRecorder` Firefox plugin to record from Youtube.
|
||||||
|
|
||||||
|
Convert from webm to audio-only ogg.
|
||||||
|
```
|
||||||
|
ffmpeg -i berkshire-1994.webm -vn -acodec copy ./berkshire-1994-00:00_10:00.ogg
|
||||||
|
```
|
||||||
|
|
||||||
|
Split by silence. This automated splitting of files is the killer app for command-line audio tools, imo.
|
||||||
|
|
||||||
|
```
|
||||||
|
sox ./berkshire-1994-00:00_10:00.ogg berkshire-1994-00:00_10:00-.wav silence 1 0.2 0.5% 1 0.2 0.5% : newfile : restart
|
||||||
|
```
|
||||||
|
|
||||||
|
Play with trimming (start and duration)
|
||||||
|
then when you narrow down the clip, you can use `sox` to trim.
|
||||||
|
This is where GUIs like Audacity are still better.
|
||||||
|
|
||||||
|
```
|
||||||
|
play ./berkshire-1994-00:00_10:00-149.wav trim 0:00 0:02.5
|
||||||
|
sox ./berkshire-1994-00:00_10:00-149.wav ./berkshire-1994-00:00_10:00-149a.wav trim 0:00 0:02.5
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
https://stackoverflow.com/questions/9667081/how-do-you-trim-the-audio-files-end-using-sox
|
||||||
|
|
||||||
|
https://unix.stackexchange.com/questions/381890/play-audio-file-from-a-certain-time-step-in-terminal
|
||||||
|
|
||||||
|
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,49 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
import soundfile as sf
|
||||||
|
import pyloudnorm as pyln
|
||||||
|
import sys
|
||||||
|
|
||||||
|
homedir = os.environ['HOME']
|
||||||
|
|
||||||
|
src = sys.argv[1]
|
||||||
|
rnn = f"{homedir}/src/rnnoise/examples/rnnoise_demo"
|
||||||
|
|
||||||
|
paths = Path(src).glob("**/*.wav")
|
||||||
|
|
||||||
|
for filepath in paths:
|
||||||
|
target_filepath=Path(str(filepath).replace("original", "converted"))
|
||||||
|
target_dir=os.path.dirname(target_filepath)
|
||||||
|
|
||||||
|
if (str(filepath) == str(target_filepath)):
|
||||||
|
raise ValueError("Source and target path are identical: " + str(target_filepath))
|
||||||
|
|
||||||
|
print("From: " + str(filepath))
|
||||||
|
print("To: " + str(target_filepath))
|
||||||
|
|
||||||
|
# Stereo to Mono; upsample to 48000Hz
|
||||||
|
subprocess.run(["sox", filepath, "48k.wav", "remix", "-", "rate", "48000"])
|
||||||
|
subprocess.run(["sox", "48k.wav", "-c", "1", "-r", "48000", "-b", "16", "-e", "signed-integer", "-t", "raw", "temp.raw"]) # convert wav to raw
|
||||||
|
subprocess.run([rnn, "temp.raw", "rnn.raw"]) # apply rnnoise
|
||||||
|
subprocess.run(["sox", "-r", "48k", "-b", "16", "-e", "signed-integer", "rnn.raw", "-t", "wav", "rnn.wav"]) # convert raw back to wav
|
||||||
|
|
||||||
|
subprocess.run(["mkdir", "-p", str(target_dir)])
|
||||||
|
subprocess.run(["sox", "rnn.wav", str(target_filepath), "remix", "-", "highpass", "100", "lowpass", "7000", "rate", "22050"]) # apply high/low pass filter and change sr to 22050Hz
|
||||||
|
|
||||||
|
data, rate = sf.read(target_filepath)
|
||||||
|
|
||||||
|
# peak normalize audio to -1 dB
|
||||||
|
peak_normalized_audio = pyln.normalize.peak(data, -1.0)
|
||||||
|
|
||||||
|
# measure the loudness first
|
||||||
|
meter = pyln.Meter(rate) # create BS.1770 meter
|
||||||
|
loudness = meter.integrated_loudness(data)
|
||||||
|
|
||||||
|
# loudness normalize audio to -25 dB LUFS
|
||||||
|
loudness_normalized_audio = pyln.normalize.loudness(data, loudness, -25.0)
|
||||||
|
|
||||||
|
sf.write(target_filepath, data=loudness_normalized_audio, samplerate=22050)
|
||||||
|
|
||||||
|
print("")
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue