From 02e4dd662a443b511f37112bb08a291356241ef8 Mon Sep 17 00:00:00 2001 From: Pablo Diaz Date: Mon, 9 Jan 2023 17:44:32 -0400 Subject: [PATCH 1/3] feature #1 ulaw->wav_logpcm --- README.md | 18 +++ docs/format_table.md | 2 +- examples/ulaw2wav_logpcm/main.go | 75 ++++++++++ examples/{ulaw2wav => ulaw2wav_lpcm}/main.go | 6 +- pkg/audioconfig/wav.go | 8 + pkg/encoding/encoding.go | 1 + pkg/transcoder/models_transcoder.go | 2 +- pkg/transcoder/mulaw2wav.go | 4 +- pkg/transcoder/mulaw2wavlogpcm.go | 146 +++++++++++++++++++ pkg/transcoder/router.go | 28 +++- pkg/transcoder/transcoding_general.go | 8 +- 11 files changed, 286 insertions(+), 12 deletions(-) create mode 100644 examples/ulaw2wav_logpcm/main.go rename examples/{ulaw2wav => ulaw2wav_lpcm}/main.go (95%) create mode 100644 pkg/transcoder/mulaw2wavlogpcm.go diff --git a/README.md b/README.md index 53d07a1..7f77b8a 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,14 @@ Sox is a great tool, but it's not easy to use. SoPro is a next generation sound processing tool that is easy to use and easy to extend. By now only audio files can be converted to other formats, but in the future more features will be added, like video processing, etc. +Functionalities: + +## Installation + +```bash +go get -v github.com/pablodz/sopro +``` + ## Methods planned to be implemented - [x] Chunked @@ -11,8 +19,18 @@ Sox is a great tool, but it's not easy to use. SoPro is a next generation sound - [ ] Batch - [ ] Streaming +## Examples + +Check [./examples](./examples/) folder + ## Roadmap +- [ ] CLI (sox-friendly) +- [ ] GUI (in another repo) +- [ ] Microservice (in another repo) + - [ ] HTTP + - [ ] Websocket + - [ ] gRPC - [x] Audio file conversion - [ ] Format conversion [Work in progress...](docs/format_table.md) - [ ] Bitrate conversion diff --git a/docs/format_table.md b/docs/format_table.md index 9bb2ea1..5249a30 100644 --- a/docs/format_table.md +++ b/docs/format_table.md @@ -37,7 +37,7 @@ blockquote { | mogg | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | + | | | | | | | | | | | | | | | | | | ulaw | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | + | | | | | | | | | | | + | | | | | | opus | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | + | | | | | | | | | | | | | | | -| pcm | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | + | | | | | | | | | | | | | | +| pcm | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | + | | | | | | | | | + | | | | | | ra | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | + | | | | | | | | | | | | | | rm | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | + | | | | | | | | | | | | | raw | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | + | | | | | | | | | | | diff --git a/examples/ulaw2wav_logpcm/main.go b/examples/ulaw2wav_logpcm/main.go new file mode 100644 index 0000000..7fe7921 --- /dev/null +++ b/examples/ulaw2wav_logpcm/main.go @@ -0,0 +1,75 @@ +package main + +import ( + "os" + + "github.com/pablodz/sopro/pkg/audioconfig" + "github.com/pablodz/sopro/pkg/cpuarch" + "github.com/pablodz/sopro/pkg/encoding" + "github.com/pablodz/sopro/pkg/fileformat" + "github.com/pablodz/sopro/pkg/method" + "github.com/pablodz/sopro/pkg/transcoder" +) + +func main() { + + // Open the input file + in, err := os.Open("./internal/samples/recording.ulaw") + if err != nil { + panic(err) + } + defer in.Close() + + // Create the output file + out, err := os.Create("./internal/samples/result_sample_ulaw_mono_8000_le_logpcm.wav") + if err != nil { + panic(err) + } + defer out.Close() + + // create a transcoder + t := &transcoder.Transcoder{ + Method: method.BIT_TABLE, + SourceConfigs: transcoder.TranscoderAudioConfig{ + Endianness: cpuarch.LITTLE_ENDIAN, + }, + TargetConfigs: transcoder.TranscoderAudioConfig{ + Endianness: cpuarch.LITTLE_ENDIAN, + }, + SizeBuffer: 1024, + Verbose: true, + } + + // Transcode the file + err = t.Mulaw2Wav( + &transcoder.AudioFileIn{ + Data: in, + AudioFileGeneral: transcoder.AudioFileGeneral{ + Format: fileformat.AUDIO_MULAW, + Config: audioconfig.MulawConfig{ + BitDepth: 8, + Channels: 1, + Encoding: encoding.SPACE_LOGARITHMIC, // ulaw is logarithmic + SampleRate: 8000, + }, + }, + }, + &transcoder.AudioFileOut{ + Data: out, + AudioFileGeneral: transcoder.AudioFileGeneral{ + Format: fileformat.AUDIO_WAV, + Config: audioconfig.WavConfig{ + BitDepth: 8, + Channels: 1, + Encoding: encoding.SPACE_LOGARITHMIC, + SampleRate: 8000, + }, + }, + }, + ) + + if err != nil { + panic(err) + } + +} diff --git a/examples/ulaw2wav/main.go b/examples/ulaw2wav_lpcm/main.go similarity index 95% rename from examples/ulaw2wav/main.go rename to examples/ulaw2wav_lpcm/main.go index a910ca6..accd527 100644 --- a/examples/ulaw2wav/main.go +++ b/examples/ulaw2wav_lpcm/main.go @@ -21,7 +21,7 @@ func main() { defer in.Close() // Create the output file - out, err := os.Create("./internal/samples/result_sample_ulaw_mono_8000_be.wav") + out, err := os.Create("./internal/samples/result_sample_ulaw_mono_8000_le_lpcm.wav") if err != nil { panic(err) } @@ -36,8 +36,8 @@ func main() { TargetConfigs: transcoder.TranscoderAudioConfig{ Endianness: cpuarch.LITTLE_ENDIAN, }, - SizeBufferToProcess: 1024, - Verbose: true, + SizeBuffer: 1024, + Verbose: true, } // Transcode the file diff --git a/pkg/audioconfig/wav.go b/pkg/audioconfig/wav.go index 2f830f5..a049920 100644 --- a/pkg/audioconfig/wav.go +++ b/pkg/audioconfig/wav.go @@ -13,6 +13,14 @@ type WavConfig struct { Encoding int // the encoding format (e.g. "PCM", "IEEE_FLOAT") } +// https://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html +const ( + WAVE_FORMAT_PCM = 0x0001 + WAVE_FORMAT_IEEE_FLOAT = 0x0003 + WAVE_FORMAT_ALAW = 0x0006 + WAVE_FORMAT_MULAW = 0x0007 +) + func PrintWavHeaders(headersWav []byte) { if len(headersWav) != 44 { log.Println("[ERROR] Headers are not 44 bytes long") diff --git a/pkg/encoding/encoding.go b/pkg/encoding/encoding.go index 6941d69..ad9ffbf 100644 --- a/pkg/encoding/encoding.go +++ b/pkg/encoding/encoding.go @@ -7,6 +7,7 @@ const ( ) var ENCODINGS = map[int]string{ + NOT_FILLED: "Not filled", SPACE_LINEAR: "Linear", SPACE_LOGARITHMIC: "Logarithmic", } diff --git a/pkg/transcoder/models_transcoder.go b/pkg/transcoder/models_transcoder.go index 2a2a2fb..ade4ecf 100644 --- a/pkg/transcoder/models_transcoder.go +++ b/pkg/transcoder/models_transcoder.go @@ -3,7 +3,7 @@ package transcoder type Transcoder struct { Method int // the method of transcoding (e.g. 1, 2, 3, etc.) MethodAdvancedConfigs interface{} // the specific configuration options for the transcoding method - SizeBufferToProcess int // the size of the buffer to read from the input file. Default is 1024 + SizeBuffer int // the size of the buffer to read from the input file. Default is 1024 SourceConfigs TranscoderAudioConfig // the source configuration TargetConfigs TranscoderAudioConfig // the target configuration BitDepth int // the bit depth (e.g. 8, 16, 24) Needs to be equal for source and target diff --git a/pkg/transcoder/mulaw2wav.go b/pkg/transcoder/mulaw2wav.go index f7bb6e5..7c80de5 100644 --- a/pkg/transcoder/mulaw2wav.go +++ b/pkg/transcoder/mulaw2wav.go @@ -31,7 +31,7 @@ func init() { // Transcode an ulaw file to a wav file (large files supported) // https://raw.githubusercontent.com/corkami/pics/master/binary/WAV.png // http://www.topherlee.com/software/pcm-tut-wavformat.html -func mulaw2Wav(in *AudioFileIn, out *AudioFileOut, transcoder *Transcoder) (err error) { +func mulaw2WavLpcm(in *AudioFileIn, out *AudioFileOut, transcoder *Transcoder) (err error) { // read all the file if transcoder.Verbose { @@ -74,7 +74,7 @@ func mulaw2Wav(in *AudioFileIn, out *AudioFileOut, transcoder *Transcoder) (err 'W', 'A', 'V', 'E', // Format 'f', 'm', 't', ' ', // Sub-chunk 1 ID 16, 0, 0, 0, // Sub-chunk 1 size - 1, 0, // Audio format (PCM) + audioconfig.WAVE_FORMAT_PCM, 0, // Audio format (PCM) byte(channels), 0, // Number of channels byte(sampleRate & 0xFF), // sample rate (low) byte(sampleRate >> 8 & 0xFF), // sample rate (mid) diff --git a/pkg/transcoder/mulaw2wavlogpcm.go b/pkg/transcoder/mulaw2wavlogpcm.go new file mode 100644 index 0000000..d112ef1 --- /dev/null +++ b/pkg/transcoder/mulaw2wavlogpcm.go @@ -0,0 +1,146 @@ +package transcoder + +import ( + "bufio" + "fmt" + "io" + "log" + "os" + + "github.com/pablodz/sopro/pkg/audioconfig" + "github.com/pablodz/sopro/pkg/cpuarch" + "github.com/pablodz/sopro/pkg/encoding" + "golang.org/x/term" +) + +func init() { + + err := error(nil) + WIDTH_TERMINAL, HEIGHT_TERMINAL, err = term.GetSize(0) + if err != nil { + log.Fatal(err) + } +} + +// TODO: split functions for different sizes of files +// Transcode an ulaw file to a wav file (large files supported) +// https://raw.githubusercontent.com/corkami/pics/master/binary/WAV.png +// http://www.topherlee.com/software/pcm-tut-wavformat.html +func mulaw2WavLogpcm(in *AudioFileIn, out *AudioFileOut, transcoder *Transcoder) (err error) { + + // read all the file + if transcoder.Verbose { + graphIn(in) + } + + // Get the WAV file configuration + channels := out.Config.(audioconfig.WavConfig).Channels + sampleRate := out.Config.(audioconfig.WavConfig).SampleRate + bitsPerSample := out.Config.(audioconfig.WavConfig).BitDepth + transcoder.SourceConfigs.Encoding = in.Config.(audioconfig.MulawConfig).Encoding + transcoder.TargetConfigs.Encoding = out.Config.(audioconfig.WavConfig).Encoding + transcoder.BitDepth = bitsPerSample + + if transcoder.SourceConfigs.Endianness == cpuarch.NOT_FILLED && transcoder.TargetConfigs.Endianness == cpuarch.NOT_FILLED { + transcoder.SourceConfigs.Endianness = cpuarch.LITTLE_ENDIAN // replace with cpuarch.GetEndianess() + transcoder.TargetConfigs.Endianness = cpuarch.LITTLE_ENDIAN + } + + transcoder.Println( + "\n[Format] ", in.Format, "=>", out.Format, + "\n[Encoding] ", encoding.ENCODINGS[in.Config.(audioconfig.MulawConfig).Encoding], "=>", encoding.ENCODINGS[out.Config.(audioconfig.WavConfig).Encoding], + "\n[Channels] ", in.Config.(audioconfig.MulawConfig).Channels, "=>", channels, + "\n[SampleRate] ", in.Config.(audioconfig.MulawConfig).SampleRate, "=>", sampleRate, "kHz", + "\n[BitDepth] ", in.Config.(audioconfig.MulawConfig).BitDepth, "=>", bitsPerSample, "bytes", + "\n[Transcoder][Source][Encoding]", encoding.ENCODINGS[transcoder.SourceConfigs.Encoding], + "\n[Transcoder][Target][Encoding]", encoding.ENCODINGS[transcoder.TargetConfigs.Encoding], + "\n[Transcoder][BitDepth] ", transcoder.BitDepth, + "\n[Transcoder][Endianness] ", cpuarch.ENDIANESSES[cpuarch.GetEndianess()], + ) + + // Create a buffered reader and writer + in.Reader = bufio.NewReader(in.Data) + out.Writer = bufio.NewWriter(out.Data) + out.Length = 0 + + headersWav := []byte{ + 'R', 'I', 'F', 'F', // Chunk ID + 0, 0, 0, 0, // Chunk size + 'W', 'A', 'V', 'E', // Format + 'f', 'm', 't', ' ', // Sub-chunk 1 ID + 16, 0, 0, 0, // Sub-chunk 1 size + audioconfig.WAVE_FORMAT_MULAW, 0, // Audio format (1 = PCM) + byte(channels), 0, // Number of channels + byte(sampleRate & 0xFF), // sample rate (low) + byte(sampleRate >> 8 & 0xFF), // sample rate (mid) + byte(sampleRate >> 16 & 0xFF), // sample rate (high) + byte(sampleRate >> 24 & 0xFF), // sample rate (high) + byte(sampleRate * channels * (bitsPerSample / 8) & 0xFF), // byte rate (low) + byte(sampleRate * channels * (bitsPerSample / 8) >> 8 & 0xFF), // byte rate (mid) + byte(sampleRate * channels * (bitsPerSample / 8) >> 16 & 0xFF), // byte rate (high) + byte(sampleRate * channels * (bitsPerSample / 8) >> 24 & 0xFF), // byte rate (high) + byte(channels * (bitsPerSample / 8)), 0, // block align + byte(bitsPerSample), 0, // bits per sample + 'd', 'a', 't', 'a', + 0, 0, 0, 0, + } + out.Writer.Write(headersWav) + out.Length += len(headersWav) + + if transcoder.Verbose { + audioconfig.PrintWavHeaders(headersWav) + } + + // Copy the data from the input file to the output file in chunks + if err = TranscodeBytes(in, out, transcoder); err != nil { + return fmt.Errorf("error converting bytes: %v", err) + } + + // Flush the output file + if err := out.Writer.Flush(); err != nil { + return fmt.Errorf("error flushing output file: %v", err) + } + transcoder.Println("Wrote", out.Length, "bytes to output file") + + // Update the file size and data size fields + fileFixer := out.Data.(*os.File) + r, err := fileFixer.Seek(4, io.SeekStart) + if err != nil { + return fmt.Errorf("error seeking file: %v", err) + } + transcoder.Println("Seeked to:", r) + fileSize := []byte{ + byte((out.Length - 8) & 0xff), + byte((out.Length - 8) >> 8 & 0xff), + byte((out.Length - 8) >> 16 & 0xff), + byte((out.Length - 8) >> 24 & 0xff), + } + n, err := fileFixer.Write(fileSize) + if err != nil { + return fmt.Errorf("error writing file size: %v", err) + } + transcoder.Println("File size:", fmt.Sprintf("% 02x", fileSize), "bytes written:", n) + dataSize := []byte{ + byte((out.Length - 44) & 0xff), + byte((out.Length - 44) >> 8 & 0xff), + byte((out.Length - 44) >> 16 & 0xff), + byte((out.Length - 44) >> 24 & 0xff), + } + r, err = fileFixer.Seek(40, io.SeekStart) + if err != nil { + return fmt.Errorf("[2]error seeking file: %v", err) + } + transcoder.Println("Seeked to:", r) + n, err = fileFixer.Write(dataSize) + if err != nil { + return fmt.Errorf("error writing data size: %v", err) + } + transcoder.Println("Data size:", fmt.Sprintf("% 02x", dataSize), "bytes written:", n) + + if transcoder.Verbose { + graphOut(in, out) + } + + return nil + +} diff --git a/pkg/transcoder/router.go b/pkg/transcoder/router.go index 02535fb..1ea8ea2 100644 --- a/pkg/transcoder/router.go +++ b/pkg/transcoder/router.go @@ -1,5 +1,31 @@ package transcoder +import ( + "fmt" + + "github.com/pablodz/sopro/pkg/audioconfig" + "github.com/pablodz/sopro/pkg/encoding" +) + +const ErrUnsupportedConversion = "unsupported conversion" + func (t *Transcoder) Mulaw2Wav(in *AudioFileIn, out *AudioFileOut) error { - return mulaw2Wav(in, out, t) + + inSpace := in.Config.(audioconfig.MulawConfig).Encoding + outSpace := out.Config.(audioconfig.WavConfig).Encoding + + switch { + case inSpace == encoding.SPACE_LOGARITHMIC && outSpace == encoding.SPACE_LINEAR: + return mulaw2WavLpcm(in, out, t) + case inSpace == encoding.SPACE_LOGARITHMIC && outSpace == encoding.SPACE_LOGARITHMIC: + return mulaw2WavLogpcm(in, out, t) + default: + return fmt.Errorf( + "%s: %s -> %s", + ErrUnsupportedConversion, + encoding.ENCODINGS[inSpace], + encoding.ENCODINGS[outSpace], + ) + + } } diff --git a/pkg/transcoder/transcoding_general.go b/pkg/transcoder/transcoding_general.go index 169359d..b2c737d 100644 --- a/pkg/transcoder/transcoding_general.go +++ b/pkg/transcoder/transcoding_general.go @@ -42,8 +42,8 @@ func TranscodeBytes(in *AudioFileIn, out *AudioFileOut, transcoder *Transcoder) func equalSpaceEncoding(in *AudioFileIn, out *AudioFileOut, transcoder *Transcoder) (int, error) { sizeBuff := 1024 // max size, more than that would be too much - if transcoder.SizeBufferToProcess > 0 { - sizeBuff = transcoder.SizeBufferToProcess + if transcoder.SizeBuffer > 0 { + sizeBuff = transcoder.SizeBuffer } nTotal := 0 buf := make([]byte, sizeBuff) // read and write in chunks of 1024 byte @@ -69,8 +69,8 @@ func equalSpaceEncoding(in *AudioFileIn, out *AudioFileOut, transcoder *Transcod func differentSpaceEncoding(in *AudioFileIn, out *AudioFileOut, transcoder *Transcoder) (int, error) { sizeBuff := 1024 // max size, more than that would be too much - if transcoder.SizeBufferToProcess > 0 { - sizeBuff = transcoder.SizeBufferToProcess + if transcoder.SizeBuffer > 0 { + sizeBuff = transcoder.SizeBuffer } nTotal := 0 buf := make([]byte, sizeBuff) // input buffer From 9c3906a7d827c32dac6b4d2b7d64a591ba54ded9 Mon Sep 17 00:00:00 2001 From: Pablo Diaz Date: Mon, 9 Jan 2023 17:58:55 -0400 Subject: [PATCH 2/3] docs --- README.md | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 7f77b8a..7d391a5 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,26 @@ Sox is a great tool, but it's not easy to use. SoPro is a next generation sound processing tool that is easy to use and easy to extend. By now only audio files can be converted to other formats, but in the future more features will be added, like video processing, etc. -Functionalities: +``` + ┌─────────────────┐ + raw data ───────►│ ├────────► returns raw data in other format + │ │ + │ │ + websocket ───────►│ ├────────► returns raw data in other formats + │ │ + │ SOPRO-CORE │ + chunked data───────►│ ├────────► returns chunked processed data + │ │ + │ │ + gRPC ───────►│ ├────────► returns grpc chunked data + │ │ + └─────────────────┘ + +Examples: + +- ulaw -> wav pcm +- ulaw -> wav pcm normalized (on the fly) +``` ## Installation From c90b8ed718b68a25c498d625508f6d77b234f4ab Mon Sep 17 00:00:00 2001 From: Pablo Diaz Date: Mon, 9 Jan 2023 18:00:12 -0400 Subject: [PATCH 3/3] docs --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 7d391a5..ca18c6c 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,9 @@ Examples: - ulaw -> wav pcm normalized (on the fly) ``` +Plugins: +- Connectivity to python neural network inference api with grpc/http and caching the inference + ## Installation ```bash