Skip to content

Commit

Permalink
Dubbing: Fix bug of changing window. v5.15.20
Browse files Browse the repository at this point in the history
  • Loading branch information
winlinvip committed Jul 22, 2024
1 parent d41d180 commit a21cac0
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 21 deletions.
14 changes: 12 additions & 2 deletions DEVELOPER.md
Original file line number Diff line number Diff line change
Expand Up @@ -937,6 +937,16 @@ youtube-dl --proxy socks5://127.0.0.1:10000 --output srs 'https://youtu.be/Sqraz
> Note: Setup the `--output TEMPLATE` when wants to define the filename.
## Regenrate ASR for Dubbing

Create a `regenerate.txt` under the project file, then restart Oryx and refresh the page:

```bash
touch ./platform/containers/data/dubbing/4830675a-7945-48fe-bed9-72e6fa904a19/regenerate.txt
```

Oryx will regenerate the ASR and translation, then delete the `regenerate.txt` to make sure it executes one time.

## WebRTC Candidate

Oryx follows the rules for WebRTC candidate, see [CANDIDATE](https://ossrs.io/lts/en-us/docs/v5/doc/webrtc#config-candidate),
Expand Down Expand Up @@ -1272,8 +1282,8 @@ The following are the update records for the Oryx server.
* Dubbing: Merge more words if in small duration. v5.15.17
* Dubbing: Allow fullscreen when ASR. v5.15.18
* Dubbing: Support disable asr or translation. v5.15.19
* Dubbing: Fix bug when changing ASR segment size. [v5.15.20](https://github.com/ossrs/oryx/releases/tag/v5.15.20)
* Dubbing: Refine the window of text. v5.15.21
* Dubbing: Fix bug when changing ASR segment size. v5.15.20
* Dubbing: Refine the window of text. [v5.15.20](https://github.com/ossrs/oryx/releases/tag/v5.15.20)
* v5.14:
* Merge features and bugfix from releases. v5.14.1
* Dubbing: Support VoD dubbing for multiple languages. [v5.14.2](https://github.com/ossrs/oryx/releases/tag/v5.14.2)
Expand Down
38 changes: 19 additions & 19 deletions platform/dubbing.go
Original file line number Diff line number Diff line change
Expand Up @@ -1550,27 +1550,18 @@ func (v *AudioResponse) QueryGroup(uuid string) *AudioGroup {
return nil
}

func (v *AudioResponse) FindAnySegmentMatchStarttime(starttime float64) *AudioSegment {
for i, g := range v.Groups {
var nextGroup *AudioGroup
if i < len(v.Groups)-1 {
nextGroup = v.Groups[i+1]
}

firstSegment, lastSegment := g.FirstSegment(), g.LastSegment()
if nextGroup != nil {
lastSegment = nextGroup.LastSegment()
}

if firstSegment == nil || lastSegment == nil {
continue
func (v *AudioResponse) MatchGroups(starttime float64, max int) []*AudioGroup {
var matched []*AudioGroup
for _, g := range v.Groups {
if first := g.FirstSegment(); first != nil && starttime <= first.Start {
matched = append(matched, g)
}

if firstSegment.OriginalStart <= starttime && starttime <= lastSegment.OriginalStart {
return firstSegment
if len(matched) >= max {
break
}
}
return nil
return matched
}

func (v *AudioResponse) AppendSegment(resp openai.AudioResponse, starttime float64) {
Expand Down Expand Up @@ -1798,7 +1789,14 @@ func (v *SrsDubbingTask) Start(ctx context.Context) error {

// Whether force to generate ASR response.
if alwaysForceRegenerateASRResponse {
v.AsrResponse = NewAudioResponse()
v.AsrResponse = nil
}

// Whether exists command file to regenerate ASR.
regenerateASR := path.Join(conf.Pwd, aiDubbingWorkDir, v.project.UUID, "regenerate.txt")
if _, err := os.Stat(regenerateASR); err == nil {
os.Remove(regenerateASR)
v.AsrResponse = nil
}

if v.AsrResponse == nil {
Expand All @@ -1815,7 +1813,9 @@ func (v *SrsDubbingTask) Start(ctx context.Context) error {
break
}

if v.AsrResponse.FindAnySegmentMatchStarttime(starttime) != nil {
// If there are some matched groups about start time, do not generate the ASR for it.
ignoreIfMatched := 3
if matches := v.AsrResponse.MatchGroups(starttime, ignoreIfMatched*2); len(matches) > ignoreIfMatched {
continue
}

Expand Down

0 comments on commit a21cac0

Please sign in to comment.