diff --git a/DEVELOPER.md b/DEVELOPER.md index af8bb439..a75c4cab 100644 --- a/DEVELOPER.md +++ b/DEVELOPER.md @@ -937,6 +937,16 @@ youtube-dl --proxy socks5://127.0.0.1:10000 --output srs 'https://youtu.be/Sqraz > Note: Setup the `--output TEMPLATE` when wants to define the filename. +## Regenrate ASR for Dubbing + +Create a `regenerate.txt` under the project file, then restart Oryx and refresh the page: + +```bash +touch ./platform/containers/data/dubbing/4830675a-7945-48fe-bed9-72e6fa904a19/regenerate.txt +``` + +Oryx will regenerate the ASR and translation, then delete the `regenerate.txt` to make sure it executes one time. + ## WebRTC Candidate Oryx follows the rules for WebRTC candidate, see [CANDIDATE](https://ossrs.io/lts/en-us/docs/v5/doc/webrtc#config-candidate), @@ -1272,8 +1282,8 @@ The following are the update records for the Oryx server. * Dubbing: Merge more words if in small duration. v5.15.17 * Dubbing: Allow fullscreen when ASR. v5.15.18 * Dubbing: Support disable asr or translation. v5.15.19 - * Dubbing: Fix bug when changing ASR segment size. [v5.15.20](https://github.com/ossrs/oryx/releases/tag/v5.15.20) - * Dubbing: Refine the window of text. v5.15.21 + * Dubbing: Fix bug when changing ASR segment size. v5.15.20 + * Dubbing: Refine the window of text. [v5.15.20](https://github.com/ossrs/oryx/releases/tag/v5.15.20) * v5.14: * Merge features and bugfix from releases. v5.14.1 * Dubbing: Support VoD dubbing for multiple languages. [v5.14.2](https://github.com/ossrs/oryx/releases/tag/v5.14.2) diff --git a/platform/dubbing.go b/platform/dubbing.go index dcdbdd30..e452e769 100644 --- a/platform/dubbing.go +++ b/platform/dubbing.go @@ -1550,27 +1550,18 @@ func (v *AudioResponse) QueryGroup(uuid string) *AudioGroup { return nil } -func (v *AudioResponse) FindAnySegmentMatchStarttime(starttime float64) *AudioSegment { - for i, g := range v.Groups { - var nextGroup *AudioGroup - if i < len(v.Groups)-1 { - nextGroup = v.Groups[i+1] - } - - firstSegment, lastSegment := g.FirstSegment(), g.LastSegment() - if nextGroup != nil { - lastSegment = nextGroup.LastSegment() - } - - if firstSegment == nil || lastSegment == nil { - continue +func (v *AudioResponse) MatchGroups(starttime float64, max int) []*AudioGroup { + var matched []*AudioGroup + for _, g := range v.Groups { + if first := g.FirstSegment(); first != nil && starttime <= first.Start { + matched = append(matched, g) } - if firstSegment.OriginalStart <= starttime && starttime <= lastSegment.OriginalStart { - return firstSegment + if len(matched) >= max { + break } } - return nil + return matched } func (v *AudioResponse) AppendSegment(resp openai.AudioResponse, starttime float64) { @@ -1798,7 +1789,14 @@ func (v *SrsDubbingTask) Start(ctx context.Context) error { // Whether force to generate ASR response. if alwaysForceRegenerateASRResponse { - v.AsrResponse = NewAudioResponse() + v.AsrResponse = nil + } + + // Whether exists command file to regenerate ASR. + regenerateASR := path.Join(conf.Pwd, aiDubbingWorkDir, v.project.UUID, "regenerate.txt") + if _, err := os.Stat(regenerateASR); err == nil { + os.Remove(regenerateASR) + v.AsrResponse = nil } if v.AsrResponse == nil { @@ -1815,7 +1813,9 @@ func (v *SrsDubbingTask) Start(ctx context.Context) error { break } - if v.AsrResponse.FindAnySegmentMatchStarttime(starttime) != nil { + // If there are some matched groups about start time, do not generate the ASR for it. + ignoreIfMatched := 3 + if matches := v.AsrResponse.MatchGroups(starttime, ignoreIfMatched*2); len(matches) > ignoreIfMatched { continue }