Skip to content

Commit

Permalink
Merge pull request #2 from pablodz/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
pablodz authored Jan 9, 2023
2 parents 29ebb13 + c90b8ed commit 7d4b31b
Show file tree
Hide file tree
Showing 11 changed files with 308 additions and 12 deletions.
40 changes: 40 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,55 @@

Sox is a great tool, but it's not easy to use. SoPro is a next generation sound processing tool that is easy to use and easy to extend. By now only audio files can be converted to other formats, but in the future more features will be added, like video processing, etc.

```
┌─────────────────┐
raw data ───────►│ ├────────► returns raw data in other format
│ │
│ │
websocket ───────►│ ├────────► returns raw data in other formats
│ │
│ SOPRO-CORE │
chunked data───────►│ ├────────► returns chunked processed data
│ │
│ │
gRPC ───────►│ ├────────► returns grpc chunked data
│ │
└─────────────────┘
Examples:
- ulaw -> wav pcm
- ulaw -> wav pcm normalized (on the fly)
```

Plugins:
- Connectivity to python neural network inference api with grpc/http and caching the inference

## Installation

```bash
go get -v github.com/pablodz/sopro
```

## Methods planned to be implemented

- [x] Chunked
- [x] Full memory
- [ ] Batch
- [ ] Streaming

## Examples

Check [./examples](./examples/) folder

## Roadmap

- [ ] CLI (sox-friendly)
- [ ] GUI (in another repo)
- [ ] Microservice (in another repo)
- [ ] HTTP
- [ ] Websocket
- [ ] gRPC
- [x] Audio file conversion
- [ ] Format conversion [Work in progress...](docs/format_table.md)
- [ ] Bitrate conversion
Expand Down
2 changes: 1 addition & 1 deletion docs/format_table.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ blockquote {
| mogg | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | + | | | | | | | | | | | | | | | | |
| ulaw | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | + | | | | | | | | | | | + | | | | |
| opus | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | + | | | | | | | | | | | | | | |
| pcm | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | + | | | | | | | | | | | | | |
| pcm | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | + | | | | | | | | | + | | | | |
| ra | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | + | | | | | | | | | | | | |
| rm | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | + | | | | | | | | | | | |
| raw | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | _ | + | | | | | | | | | | |
Expand Down
75 changes: 75 additions & 0 deletions examples/ulaw2wav_logpcm/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package main

import (
"os"

"github.com/pablodz/sopro/pkg/audioconfig"
"github.com/pablodz/sopro/pkg/cpuarch"
"github.com/pablodz/sopro/pkg/encoding"
"github.com/pablodz/sopro/pkg/fileformat"
"github.com/pablodz/sopro/pkg/method"
"github.com/pablodz/sopro/pkg/transcoder"
)

func main() {

// Open the input file
in, err := os.Open("./internal/samples/recording.ulaw")
if err != nil {
panic(err)
}
defer in.Close()

// Create the output file
out, err := os.Create("./internal/samples/result_sample_ulaw_mono_8000_le_logpcm.wav")
if err != nil {
panic(err)
}
defer out.Close()

// create a transcoder
t := &transcoder.Transcoder{
Method: method.BIT_TABLE,
SourceConfigs: transcoder.TranscoderAudioConfig{
Endianness: cpuarch.LITTLE_ENDIAN,
},
TargetConfigs: transcoder.TranscoderAudioConfig{
Endianness: cpuarch.LITTLE_ENDIAN,
},
SizeBuffer: 1024,
Verbose: true,
}

// Transcode the file
err = t.Mulaw2Wav(
&transcoder.AudioFileIn{
Data: in,
AudioFileGeneral: transcoder.AudioFileGeneral{
Format: fileformat.AUDIO_MULAW,
Config: audioconfig.MulawConfig{
BitDepth: 8,
Channels: 1,
Encoding: encoding.SPACE_LOGARITHMIC, // ulaw is logarithmic
SampleRate: 8000,
},
},
},
&transcoder.AudioFileOut{
Data: out,
AudioFileGeneral: transcoder.AudioFileGeneral{
Format: fileformat.AUDIO_WAV,
Config: audioconfig.WavConfig{
BitDepth: 8,
Channels: 1,
Encoding: encoding.SPACE_LOGARITHMIC,
SampleRate: 8000,
},
},
},
)

if err != nil {
panic(err)
}

}
6 changes: 3 additions & 3 deletions examples/ulaw2wav/main.go → examples/ulaw2wav_lpcm/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ func main() {
defer in.Close()

// Create the output file
out, err := os.Create("./internal/samples/result_sample_ulaw_mono_8000_be.wav")
out, err := os.Create("./internal/samples/result_sample_ulaw_mono_8000_le_lpcm.wav")
if err != nil {
panic(err)
}
Expand All @@ -36,8 +36,8 @@ func main() {
TargetConfigs: transcoder.TranscoderAudioConfig{
Endianness: cpuarch.LITTLE_ENDIAN,
},
SizeBufferToProcess: 1024,
Verbose: true,
SizeBuffer: 1024,
Verbose: true,
}

// Transcode the file
Expand Down
8 changes: 8 additions & 0 deletions pkg/audioconfig/wav.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,14 @@ type WavConfig struct {
Encoding int // the encoding format (e.g. "PCM", "IEEE_FLOAT")
}

// https://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html
const (
WAVE_FORMAT_PCM = 0x0001
WAVE_FORMAT_IEEE_FLOAT = 0x0003
WAVE_FORMAT_ALAW = 0x0006
WAVE_FORMAT_MULAW = 0x0007
)

func PrintWavHeaders(headersWav []byte) {
if len(headersWav) != 44 {
log.Println("[ERROR] Headers are not 44 bytes long")
Expand Down
1 change: 1 addition & 0 deletions pkg/encoding/encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ const (
)

var ENCODINGS = map[int]string{
NOT_FILLED: "Not filled",
SPACE_LINEAR: "Linear",
SPACE_LOGARITHMIC: "Logarithmic",
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/transcoder/models_transcoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package transcoder
type Transcoder struct {
Method int // the method of transcoding (e.g. 1, 2, 3, etc.)
MethodAdvancedConfigs interface{} // the specific configuration options for the transcoding method
SizeBufferToProcess int // the size of the buffer to read from the input file. Default is 1024
SizeBuffer int // the size of the buffer to read from the input file. Default is 1024
SourceConfigs TranscoderAudioConfig // the source configuration
TargetConfigs TranscoderAudioConfig // the target configuration
BitDepth int // the bit depth (e.g. 8, 16, 24) Needs to be equal for source and target
Expand Down
4 changes: 2 additions & 2 deletions pkg/transcoder/mulaw2wav.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ func init() {
// Transcode an ulaw file to a wav file (large files supported)
// https://raw.githubusercontent.com/corkami/pics/master/binary/WAV.png
// http://www.topherlee.com/software/pcm-tut-wavformat.html
func mulaw2Wav(in *AudioFileIn, out *AudioFileOut, transcoder *Transcoder) (err error) {
func mulaw2WavLpcm(in *AudioFileIn, out *AudioFileOut, transcoder *Transcoder) (err error) {

// read all the file
if transcoder.Verbose {
Expand Down Expand Up @@ -74,7 +74,7 @@ func mulaw2Wav(in *AudioFileIn, out *AudioFileOut, transcoder *Transcoder) (err
'W', 'A', 'V', 'E', // Format
'f', 'm', 't', ' ', // Sub-chunk 1 ID
16, 0, 0, 0, // Sub-chunk 1 size
1, 0, // Audio format (PCM)
audioconfig.WAVE_FORMAT_PCM, 0, // Audio format (PCM)
byte(channels), 0, // Number of channels
byte(sampleRate & 0xFF), // sample rate (low)
byte(sampleRate >> 8 & 0xFF), // sample rate (mid)
Expand Down
146 changes: 146 additions & 0 deletions pkg/transcoder/mulaw2wavlogpcm.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
package transcoder

import (
"bufio"
"fmt"
"io"
"log"
"os"

"github.com/pablodz/sopro/pkg/audioconfig"
"github.com/pablodz/sopro/pkg/cpuarch"
"github.com/pablodz/sopro/pkg/encoding"
"golang.org/x/term"
)

func init() {

err := error(nil)
WIDTH_TERMINAL, HEIGHT_TERMINAL, err = term.GetSize(0)
if err != nil {
log.Fatal(err)
}
}

// TODO: split functions for different sizes of files
// Transcode an ulaw file to a wav file (large files supported)
// https://raw.githubusercontent.com/corkami/pics/master/binary/WAV.png
// http://www.topherlee.com/software/pcm-tut-wavformat.html
func mulaw2WavLogpcm(in *AudioFileIn, out *AudioFileOut, transcoder *Transcoder) (err error) {

// read all the file
if transcoder.Verbose {
graphIn(in)
}

// Get the WAV file configuration
channels := out.Config.(audioconfig.WavConfig).Channels
sampleRate := out.Config.(audioconfig.WavConfig).SampleRate
bitsPerSample := out.Config.(audioconfig.WavConfig).BitDepth
transcoder.SourceConfigs.Encoding = in.Config.(audioconfig.MulawConfig).Encoding
transcoder.TargetConfigs.Encoding = out.Config.(audioconfig.WavConfig).Encoding
transcoder.BitDepth = bitsPerSample

if transcoder.SourceConfigs.Endianness == cpuarch.NOT_FILLED && transcoder.TargetConfigs.Endianness == cpuarch.NOT_FILLED {
transcoder.SourceConfigs.Endianness = cpuarch.LITTLE_ENDIAN // replace with cpuarch.GetEndianess()
transcoder.TargetConfigs.Endianness = cpuarch.LITTLE_ENDIAN
}

transcoder.Println(
"\n[Format] ", in.Format, "=>", out.Format,
"\n[Encoding] ", encoding.ENCODINGS[in.Config.(audioconfig.MulawConfig).Encoding], "=>", encoding.ENCODINGS[out.Config.(audioconfig.WavConfig).Encoding],
"\n[Channels] ", in.Config.(audioconfig.MulawConfig).Channels, "=>", channels,
"\n[SampleRate] ", in.Config.(audioconfig.MulawConfig).SampleRate, "=>", sampleRate, "kHz",
"\n[BitDepth] ", in.Config.(audioconfig.MulawConfig).BitDepth, "=>", bitsPerSample, "bytes",
"\n[Transcoder][Source][Encoding]", encoding.ENCODINGS[transcoder.SourceConfigs.Encoding],
"\n[Transcoder][Target][Encoding]", encoding.ENCODINGS[transcoder.TargetConfigs.Encoding],
"\n[Transcoder][BitDepth] ", transcoder.BitDepth,
"\n[Transcoder][Endianness] ", cpuarch.ENDIANESSES[cpuarch.GetEndianess()],
)

// Create a buffered reader and writer
in.Reader = bufio.NewReader(in.Data)
out.Writer = bufio.NewWriter(out.Data)
out.Length = 0

headersWav := []byte{
'R', 'I', 'F', 'F', // Chunk ID
0, 0, 0, 0, // Chunk size
'W', 'A', 'V', 'E', // Format
'f', 'm', 't', ' ', // Sub-chunk 1 ID
16, 0, 0, 0, // Sub-chunk 1 size
audioconfig.WAVE_FORMAT_MULAW, 0, // Audio format (1 = PCM)
byte(channels), 0, // Number of channels
byte(sampleRate & 0xFF), // sample rate (low)
byte(sampleRate >> 8 & 0xFF), // sample rate (mid)
byte(sampleRate >> 16 & 0xFF), // sample rate (high)
byte(sampleRate >> 24 & 0xFF), // sample rate (high)
byte(sampleRate * channels * (bitsPerSample / 8) & 0xFF), // byte rate (low)
byte(sampleRate * channels * (bitsPerSample / 8) >> 8 & 0xFF), // byte rate (mid)
byte(sampleRate * channels * (bitsPerSample / 8) >> 16 & 0xFF), // byte rate (high)
byte(sampleRate * channels * (bitsPerSample / 8) >> 24 & 0xFF), // byte rate (high)
byte(channels * (bitsPerSample / 8)), 0, // block align
byte(bitsPerSample), 0, // bits per sample
'd', 'a', 't', 'a',
0, 0, 0, 0,
}
out.Writer.Write(headersWav)
out.Length += len(headersWav)

if transcoder.Verbose {
audioconfig.PrintWavHeaders(headersWav)
}

// Copy the data from the input file to the output file in chunks
if err = TranscodeBytes(in, out, transcoder); err != nil {
return fmt.Errorf("error converting bytes: %v", err)
}

// Flush the output file
if err := out.Writer.Flush(); err != nil {
return fmt.Errorf("error flushing output file: %v", err)
}
transcoder.Println("Wrote", out.Length, "bytes to output file")

// Update the file size and data size fields
fileFixer := out.Data.(*os.File)
r, err := fileFixer.Seek(4, io.SeekStart)
if err != nil {
return fmt.Errorf("error seeking file: %v", err)
}
transcoder.Println("Seeked to:", r)
fileSize := []byte{
byte((out.Length - 8) & 0xff),
byte((out.Length - 8) >> 8 & 0xff),
byte((out.Length - 8) >> 16 & 0xff),
byte((out.Length - 8) >> 24 & 0xff),
}
n, err := fileFixer.Write(fileSize)
if err != nil {
return fmt.Errorf("error writing file size: %v", err)
}
transcoder.Println("File size:", fmt.Sprintf("% 02x", fileSize), "bytes written:", n)
dataSize := []byte{
byte((out.Length - 44) & 0xff),
byte((out.Length - 44) >> 8 & 0xff),
byte((out.Length - 44) >> 16 & 0xff),
byte((out.Length - 44) >> 24 & 0xff),
}
r, err = fileFixer.Seek(40, io.SeekStart)
if err != nil {
return fmt.Errorf("[2]error seeking file: %v", err)
}
transcoder.Println("Seeked to:", r)
n, err = fileFixer.Write(dataSize)
if err != nil {
return fmt.Errorf("error writing data size: %v", err)
}
transcoder.Println("Data size:", fmt.Sprintf("% 02x", dataSize), "bytes written:", n)

if transcoder.Verbose {
graphOut(in, out)
}

return nil

}
28 changes: 27 additions & 1 deletion pkg/transcoder/router.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,31 @@
package transcoder

import (
"fmt"

"github.com/pablodz/sopro/pkg/audioconfig"
"github.com/pablodz/sopro/pkg/encoding"
)

const ErrUnsupportedConversion = "unsupported conversion"

func (t *Transcoder) Mulaw2Wav(in *AudioFileIn, out *AudioFileOut) error {
return mulaw2Wav(in, out, t)

inSpace := in.Config.(audioconfig.MulawConfig).Encoding
outSpace := out.Config.(audioconfig.WavConfig).Encoding

switch {
case inSpace == encoding.SPACE_LOGARITHMIC && outSpace == encoding.SPACE_LINEAR:
return mulaw2WavLpcm(in, out, t)
case inSpace == encoding.SPACE_LOGARITHMIC && outSpace == encoding.SPACE_LOGARITHMIC:
return mulaw2WavLogpcm(in, out, t)
default:
return fmt.Errorf(
"%s: %s -> %s",
ErrUnsupportedConversion,
encoding.ENCODINGS[inSpace],
encoding.ENCODINGS[outSpace],
)

}
}
Loading

0 comments on commit 7d4b31b

Please sign in to comment.