-
Notifications
You must be signed in to change notification settings - Fork 4.2k
refactor: Move upload/delete transcript into video_config service #37657
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,16 +8,33 @@ | |
|
|
||
| import logging | ||
|
|
||
| from opaque_keys.edx.keys import CourseKey, UsageKey | ||
|
|
||
| from openedx.core.djangoapps.video_config import sharing | ||
| from django.core.files import File | ||
| from django.core.files.base import ContentFile | ||
| from edxval.api import create_external_video, create_or_update_video_transcript, delete_video_transcript | ||
| from opaque_keys.edx.keys import ( | ||
| CourseKey, | ||
| UsageKey, | ||
| ) | ||
| from opaque_keys.edx.locator import LibraryLocatorV2 | ||
| from xblocks_contrib.video import VideoBlock | ||
| from organizations.api import get_course_organization | ||
| from openedx.core.djangoapps.video_config import sharing | ||
| from openedx.core.djangoapps.video_config.models import ( | ||
| CourseYoutubeBlockedFlag, | ||
| HLSPlaybackEnabledFlag, | ||
| ) | ||
| from openedx.core.djangoapps.video_config.toggles import TRANSCRIPT_FEEDBACK | ||
| from openedx.core.djangoapps.video_pipeline.config.waffle import DEPRECATE_YOUTUBE | ||
| from openedx.core.djangoapps.content_libraries.api import ( | ||
| add_library_block_static_asset_file, | ||
| delete_library_block_static_asset_file, | ||
| ) | ||
| from openedx.core.djangoapps.video_config.transcripts_utils import ( | ||
| Transcript, | ||
| clean_video_id, | ||
| get_html5_ids, | ||
| remove_subs_from_store, | ||
| ) | ||
|
|
||
| log = logging.getLogger(__name__) | ||
|
|
||
|
|
@@ -29,6 +46,9 @@ class VideoConfigService: | |
| This service abstracts away edx-platform specific functionality | ||
| that the Video XBlock needs, allowing the Video XBlock to be | ||
| extracted to a separate repository. | ||
|
|
||
| TODO: This service could be improved in a few ways: | ||
| https://github.com/openedx/edx-platform/issues/37656 | ||
| """ | ||
|
|
||
| def get_public_video_url(self, usage_id: UsageKey) -> str: | ||
|
|
@@ -37,7 +57,7 @@ def get_public_video_url(self, usage_id: UsageKey) -> str: | |
| """ | ||
| return sharing.get_public_video_url(usage_id) | ||
|
|
||
| def get_public_sharing_context(self, video_block, course_key: CourseKey) -> dict: | ||
| def get_public_sharing_context(self, video_block: VideoBlock, course_key: CourseKey) -> dict: | ||
| """ | ||
| Get the complete public sharing context for a video. | ||
|
|
||
|
|
@@ -91,3 +111,117 @@ def is_hls_playback_enabled(self, course_id: CourseKey) -> bool: | |
| Check if HLS playback is enabled for the course. | ||
| """ | ||
| return HLSPlaybackEnabledFlag.feature_enabled(course_id) | ||
|
|
||
| def upload_transcript( | ||
| self, | ||
| *, | ||
| video_block: VideoBlock, | ||
| language_code: str, | ||
| new_language_code: str | None, | ||
| transcript_file: File, | ||
| edx_video_id: str | None, | ||
| ) -> None: | ||
| """ | ||
| Store a transcript, however the runtime prefers to. | ||
|
|
||
| Mutates: | ||
| * video_block.transcripts | ||
| * video_block.edx_video_id, iff a new video is created in edx-val. | ||
|
|
||
| Can raise: | ||
| * UnicodeDecodeError | ||
| * TranscriptsGenerationException | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we throw platform specific exceptions, for example Even if we throw them we can't catch the specific exceptions within the external XBlock. So if we can only catch the general exception in the external XBlock. Shouldn't we only throw general exceptions? |
||
| """ | ||
| is_library = isinstance(video_block.usage_key.context_key, LibraryLocatorV2) | ||
| content: bytes = transcript_file.read() | ||
| if is_library: | ||
| # Save transcript as static asset in Learning Core if is a library component | ||
| filename = f'transcript-{new_language_code}.srt' | ||
| add_library_block_static_asset_file(video_block.usage_key, f"static/{filename}", content) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| else: | ||
| edx_video_id = clean_video_id(edx_video_id) | ||
| if not edx_video_id: | ||
| # Back-populate the video ID for an external video. | ||
| # pylint: disable=attribute-defined-outside-init | ||
| edx_video_id = create_external_video(display_name='external video') | ||
| video_block.edx_video_id = edx_video_id | ||
| filename = f'{edx_video_id}-{new_language_code}.srt' | ||
| # Convert SRT transcript into an SJSON format and upload it to S3 if a course component | ||
| sjson_subs = Transcript.convert( | ||
| content=content.decode('utf-8'), | ||
| input_format=Transcript.SRT, | ||
| output_format=Transcript.SJSON | ||
| ).encode() | ||
| create_or_update_video_transcript( | ||
| video_id=edx_video_id, | ||
| language_code=language_code, | ||
| metadata={ | ||
| 'file_format': Transcript.SJSON, | ||
| 'language_code': new_language_code, | ||
| }, | ||
| file_data=ContentFile(sjson_subs), | ||
| ) | ||
|
|
||
| # If a new transcript is added, then both new_language_code and | ||
| # language_code fields will have the same value. | ||
| if language_code != new_language_code: | ||
| video_block.transcripts.pop(language_code, None) | ||
| video_block.transcripts[new_language_code] = filename | ||
|
|
||
| if is_library: | ||
| _save_transcript_field(video_block) | ||
|
|
||
| def delete_transcript( | ||
| self, | ||
| *, | ||
| video_block: VideoBlock, | ||
| edx_video_id: str | None, | ||
| language_code: str, | ||
| ) -> None: | ||
| """ | ||
| Delete a transcript from the runtime's storage. | ||
| """ | ||
| edx_video_id = clean_video_id(edx_video_id) | ||
| if edx_video_id: | ||
| delete_video_transcript(video_id=edx_video_id, language_code=language_code) | ||
| if isinstance(video_block.context_key, LibraryLocatorV2): | ||
| transcript_name = video_block.transcripts.pop(language_code, None) | ||
| if transcript_name: | ||
| delete_library_block_static_asset_file(video_block.usage_key, f"static/{transcript_name}") | ||
| _save_transcript_field(video_block) | ||
| else: | ||
| if language_code == 'en': | ||
| # remove any transcript file from content store for the video ids | ||
| possible_sub_ids = [ | ||
| video_block.sub, # pylint: disable=access-member-before-definition | ||
| video_block.youtube_id_1_0 | ||
| ] + get_html5_ids(video_block.html5_sources) | ||
| for sub_id in possible_sub_ids: | ||
| remove_subs_from_store(sub_id, video_block, language_code) | ||
| # update metadata as `en` can also be present in `transcripts` field | ||
| remove_subs_from_store( | ||
| video_block.transcripts.pop(language_code, None), video_block, language_code | ||
| ) | ||
| # also empty `sub` field | ||
| video_block.sub = '' # pylint: disable=attribute-defined-outside-init | ||
| else: | ||
| remove_subs_from_store( | ||
| video_block.transcripts.pop(language_code, None), video_block, language_code | ||
| ) | ||
|
|
||
|
|
||
| def _save_transcript_field(video_block: VideoBlock): | ||
| """ | ||
| Hacky workaround to ensure that transcript field is saved for Learning Core video blocks. | ||
|
|
||
| It's not clear why this is necessary. | ||
| """ | ||
| field = video_block.fields['transcripts'] | ||
| if video_block.transcripts: | ||
| transcripts_copy = video_block.transcripts.copy() | ||
| # Need to delete to overwrite, it's weird behavior, | ||
| # but it only works like this. | ||
| field.delete_from(video_block) | ||
| field.write_to(video_block, transcripts_copy) | ||
| else: | ||
| field.delete_from(video_block) | ||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shouldn't we import it from the xmodule.
I am also having import VideoBlock in this PR, will be great if we could skip its annotation until we are done with extraction work.