From c401c30d24c5d12d97efe4202389ad3dd15b7211 Mon Sep 17 00:00:00 2001 From: mutlusun Date: Tue, 2 Jun 2026 21:30:51 +0200 Subject: [PATCH] chore(editorconfig): add editorconfig and apply to files --- .editorconfig | 19 ++ .github/ISSUE_TEMPLATE/bug_report.md | 5 +- .github/workflows/pyinstaller.yml | 2 +- AGENTS.md | 2 +- CHANGELOG.md | 14 +- README.md | 34 +- cli_test.bat | 2 +- environments/requirements_win_cpu.txt | 4 +- environments/requirements_win_cuda.txt | 8 +- faster-whisper-test.py | 4 +- generate_linux_binary.sh | 2 +- models/fast/NOSCRIBE_README.txt | 4 +- models/precise/NOSCRIBE_README.txt | 4 +- noScribe/CTkToolTips.py | 20 +- noScribe/main.py | 442 ++++++++++++------------- noScribe/pyannote_mp_worker.py | 6 +- noScribe/tkHyperlinkManager.py | 2 +- noScribe/utils.py | 2 +- noScribe/whisper_mp_worker.py | 12 +- noScribeEdit/readme.txt | 2 +- prompts/prompt.yml | 2 +- pyannote/README.md | 10 +- pyannote/config.yaml | 4 +- pyannote/plda/README.md | 2 +- pyinstaller/noScribe_win.spec | 18 +- pyinstaller/nsis_template.txt | 10 +- pyinstaller/win_build.py | 22 +- tests/audio/test_convert.py | 2 +- trans/noScribe.de.yml | 20 +- trans/noScribe.en.yml | 36 +- trans/noScribe.es.yml | 20 +- trans/noScribe.fr.yml | 18 +- trans/noScribe.it.yml | 24 +- trans/noScribe.ja.yml | 16 +- trans/noScribe.pt.yml | 22 +- trans/noScribe.ru.yml | 24 +- trans/noScribe.zh-CN.yml | 18 +- 37 files changed, 438 insertions(+), 420 deletions(-) create mode 100644 .editorconfig diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 00000000..98e30e50 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,19 @@ +# EditorConfig is awesome: https://editorconfig.org + +# top-most EditorConfig file +root = true + +# Applies to all files. +[*] +# Insert final newline +insert_final_newline = true +# Use spaces for indentation +indent_style = space +# Delete trailing whitespace +trim_trailing_whitespace = true + +[*.md] +indent_size = 2 + +[*.py] +indent_size = 4 diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 335034f0..c5cd91da 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -4,11 +4,10 @@ about: Create a report to help us improve title: '' labels: '' assignees: '' - --- **Describe the bug** -A clear and concise description of what the bug is and under which conditions it appears. Also, copy and paste the messages from the main window. +A clear and concise description of what the bug is and under which conditions it appears. Also, copy and paste the messages from the main window. **Screenshots** If applicable, add screenshots to help explain your problem. @@ -16,5 +15,5 @@ If applicable, add screenshots to help explain your problem. **System (please complete the following information):** - OS: [e.g. Windows 11/Linux Mint/macOS Sonoma] - On Windows: Normal or CUDA version? - - noScribe Version: [e.g. 0.6] + - noScribe Version: [e.g. 0.6] - noScribe Settings: [Speaker identification on or off? Quality setting? etc.] diff --git a/.github/workflows/pyinstaller.yml b/.github/workflows/pyinstaller.yml index 5836d907..84922a6d 100644 --- a/.github/workflows/pyinstaller.yml +++ b/.github/workflows/pyinstaller.yml @@ -48,7 +48,7 @@ jobs: # You can test your matrix by printing the current Python version - name: Display Python version run: python -c "import sys; print(sys.version)" - + - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/AGENTS.md b/AGENTS.md index 3c1b3abd..ae27b35c 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1 +1 @@ -Module of the app is in "noScribe/" module directory. \ No newline at end of file +Module of the app is in "noScribe/" module directory. diff --git a/CHANGELOG.md b/CHANGELOG.md index 5d23a9b0..01846058 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,10 +22,10 @@ - improved speaker detection: number of speakers can be defined beforehand - reduced hallucination and looping by adding a VAD filter - CUDA support now non-beta -- small fixes with hebrew language setting, chinese UI locale, requirements file, etc. +- small fixes with hebrew language setting, chinese UI locale, requirements file, etc. ## version 0.4.5: -- Windows: beta version to test CUDA support (acceleration with NVIDIA graphics cards) +- Windows: beta version to test CUDA support (acceleration with NVIDIA graphics cards) ## version 0.4.2: - MacOS: Solves a bug where speaker-detection would become unreliable with MPS-acceleration (a switch to torchaudio 2.1.0 rectified this). @@ -33,7 +33,7 @@ ## version 0.4.1: - Windows: bugfix, rectifies a problem in combination with NVIDIA graphics cards -- macOS: First beta release. Solved a bug with macOS Sonoma where noScribe would not react to the mouse. +- macOS: First beta release. Solved a bug with macOS Sonoma where noScribe would not react to the mouse. ## version 0.4 beta: - much improved **speaker detection/separation** @@ -43,16 +43,16 @@ - new **noScribe Editor** app to check and correct transcripts (no MS Word-Macros anymore) - noScribe now outputs an **HTML-file** which can be opened in every major word editor (MS Word, LibreOffice, OpenOffice...) or QDA-software package - many changes under the hood to prepare for an upcoming macOS-version and improve reliability and quality of the transcription -- switched from "whisper.cpp" to "faster-whisper" as the basic framework (mainly because of the more precise timestamps) +- switched from "whisper.cpp" to "faster-whisper" as the basic framework (mainly because of the more precise timestamps) - macOS: First alpha release ## version 0.3: **new:** - Translations of the user interface into Spanish, French, Italian, Japanese, Portuguese, Russian, Chinese. Thank you, [mlynar-czyk]( https://github.com/mlynar-czyk), for this contribution! Be aware: These translations have been generated with a clever use of chatGPT. Please report any errors that you will find and make – if possible – a pull request with a better translation. - Added hyperlinks to the main window. You can now open the finished transcript directly by clicking on the filename in the log. -- Improved speaker identification, especially in situations with quick changes (by reducing "max-len" in whisper to 30). +- Improved speaker identification, especially in situations with quick changes (by reducing "max-len" in whisper to 30). - Installer now runs without admin rights. You should be able to install noScribe on a computer where you don’t have administrator privileges (i.e., because the machine is managed by the IT-department of your university). Thanks you, [BabyFnord](https://github.com/BabyFnord), for this suggestion! - + **fixes:** - To solve the problem described in issue https://github.com/kaixxx/noScribe/issues/2 (transcription failing with error 3221225794 or 3221225501), I have now included a version of whisper.cpp that supports older hardware (non AVX2). NoScribe selects automatically which version to use. Be aware though that using such old hardware will result in a very slow transcription. - Corrected UTF-8 encoding error that resulted in a failing transcription in some languages (i.e., Japanese, Hungarian). Thank you to the two people reporting this problem via e-mail! @@ -61,5 +61,5 @@ - fixed: Funny mistake in readme ("sensible data" instead of "sensitive data"). Thanks [TheOnlyWayUp](https://github.com/TheOnlyWayUp)! -## version 0.2b: +## version 0.2b: - initial beta release diff --git a/README.md b/README.md index 0cfd1e2d..0da941c2 100644 --- a/README.md +++ b/README.md @@ -8,16 +8,16 @@ > > 🌐 Available in **English, German, Spanish, Italian, and Dutch**. > -> Please update your links. +> Please update your links. --- > [!WARNING] -> Somebody has registered the domain **noscribe(dot)ai** to sell transcription services. **Stay away from this platform, I have nothing to do with it.** The real noScribe is free and always will be. This is obviously an attempt to profit from the popularity of my software and the reputation it gained over the years. Very sad. +> Somebody has registered the domain **noscribe(dot)ai** to sell transcription services. **Stay away from this platform, I have nothing to do with it.** The real noScribe is free and always will be. This is obviously an attempt to profit from the popularity of my software and the reputation it gained over the years. Very sad. ## What is noScribe? - An app to produce **high quality transcripts of interviews** for qualitative social research or journalistic use -- noScribe is **free and open source** ([GPL-3.0](https://www.gnu.org/licenses/gpl-3.0.html)), available for Windows, MacOS and Linux +- noScribe is **free and open source** ([GPL-3.0](https://www.gnu.org/licenses/gpl-3.0.html)), available for Windows, MacOS and Linux - It runs **completely locally** on your computer, protecting the confidentiality of your interviews. No cloud, no worries - It can distinguish between different **speakers** and understands around 60 languages (more or less, see below) - It includes a **nice editor** to review, verify and correct the resulting transcript @@ -29,8 +29,8 @@ (The transcript is from [this interview](https://www.youtube.com/watch?v=vOwajAbvPzQ&t=2018s) which I did in May 2022 with the Russian sociologist Natalia Savelyeva.) ## Limitations -- The download is quite large (several gigabytes) due to the included AI models. -- Beware that a one hour interview can take up to three hours to transcribe, depending on your machine. +- The download is quite large (several gigabytes) due to the included AI models. +- Beware that a one hour interview can take up to three hours to transcribe, depending on your machine. - Poor audio and background noise will lead to poor transcription results. - No automatic transcription is perfect, there will always be some manual revision necessary. Use the [included Editor](#noscribeedit) to check your transcripts thoroughly. (See also ["Factors Influencing the Quality"](#factors-influencing-the-quality-of-the-transcription) and ["Known Issues"](#known-issues) below.) @@ -43,7 +43,7 @@ The [urban dictionary](https://www.urbandictionary.com/define.php?term=Scribe) d **Kai Dröge**, PhD in sociology (with a background in computer science), qualitative researcher and teacher, [Lucerne University for Applied Science (Switzerland)](https://www.hslu.ch/de-ch/hochschule-luzern/ueber-uns/personensuche/profile/?pid=823) and [Institute for Social Research, Frankfurt/M. (Germany)](https://www.ifs.uni-frankfurt.de/personendetails/kai-droege.html). ## Donate -NoScribe is free and always will be. However, developing it costs real money. I have purchased hardware for testing and pay Apple annually for a developer ID. If you would like to support this project, you can make a donation on Ko-Fi. Thanks! +NoScribe is free and always will be. However, developing it costs real money. I have purchased hardware for testing and pay Apple annually for a developer ID. If you would like to support this project, you can make a donation on Ko-Fi. Thanks! [![ko-fi](https://ko-fi.com/img/githubbutton_sm.svg)](https://ko-fi.com/noscribe) @@ -57,15 +57,15 @@ NoScribe is free and always will be. However, developing it costs real money. I Click to expand - **Download:** - - The **general purpose version** for normal PCs without an NVIDIA graphics card: [https://drive.switch.ch/index.php/s/EIVup04qkSHb54j?path=%2FnoScribe%20vers.%200.7%2FWindows%2Fnormal](https://drive.switch.ch/index.php/s/EIVup04qkSHb54j?path=%2FnoScribe%20vers.%200.7%2FWindows%2Fnormal) + - The **general purpose version** for normal PCs without an NVIDIA graphics card: [https://drive.switch.ch/index.php/s/EIVup04qkSHb54j?path=%2FnoScribe%20vers.%200.7%2FWindows%2Fnormal](https://drive.switch.ch/index.php/s/EIVup04qkSHb54j?path=%2FnoScribe%20vers.%200.7%2FWindows%2Fnormal) - A special version using **CUDA acceleration on NVIDIA graphics cards** with at least 6 GB of VRAM: [https://drive.switch.ch/index.php/s/EIVup04qkSHb54j?path=%2FnoScribe%20vers.%200.7%2FWindows%2Fcuda](https://drive.switch.ch/index.php/s/EIVup04qkSHb54j?path=%2FnoScribe%20vers.%200.7%2FWindows%2Fcuda). Make sure that your NVIDIA drivers are on version 570.65 or higher. You must also install the [CUDA toolkit from here](https://developer.nvidia.com/cuda-downloads?target_os=Windows) (a reboot is required afterwards). -- **Installation**: +- **Installation**: - Start the downloaded setup file. This may take a while, be patient. - If you get a warning that "Windows protected your PC" and the app comes from an "Unknown publisher", you have to trust us and click "Run anyway" - To do a silent install on a larger group of computers, start the setup with the argument `/S`. - **Known Issues:** - - It seems that the RTX/GTX 1XXX generation of cards is no longer supported in CUDA. Use the normal version instead. - - If you receive the following error message: "Transcription worker exited unexpectedly (code 3221226505)," try forcing the use of the CPU for transcription instead of the graphics card. This method is slower but more reliable. To do so, follow these steps: Close noScribe. Open the file `C:\Users\\AppData\Local\noScribe\noScribe\config.yml` in a text editor. Change the value for `force_whisper_cpu` to `'True'`. Don't forget the quotation marks around 'True'. Now, restart NoScribe. + - It seems that the RTX/GTX 1XXX generation of cards is no longer supported in CUDA. Use the normal version instead. + - If you receive the following error message: "Transcription worker exited unexpectedly (code 3221226505)," try forcing the use of the CPU for transcription instead of the graphics card. This method is slower but more reliable. To do so, follow these steps: Close noScribe. Open the file `C:\Users\\AppData\Local\noScribe\noScribe\config.yml` in a text editor. Change the value for `force_whisper_cpu` to `'True'`. Don't forget the quotation marks around 'True'. Now, restart NoScribe. @@ -174,7 +174,7 @@ Dröge, K. (2025). noScribe. AI-powered Audio Transcription (Version XXX) [Compu - **Mark Pause**: If enabled, parts of your audio without voice activity will be marked as pauses. Pauses are transcribed as round brackets with one dot per second inside, e.g., "(..)" for a two-second pause. Pauses longer than 10 seconds are written out as "(XX seconds pause)" or "(XX minutes pause)". You have the option to mark either pauses of one second and more ("1sec+"), two seconds and more ("2sec+"), or only the longer ones of three seconds and more ("3sec+"). Choose "none" to disable this feature entirely. - **Speaker Detection:** This feature uses the Pyannote AI model to identify distinct speakers in your audio and organizes the transcript accordingly. Choose the number of speakers if known, or select "auto." Opting for "none" bypasses this step altogether, reducing the processing time by approximately half. However, the resultant transcript will be a continuous block of text without any indicators of speaker transitions. - **Overlapping Speech**: If enabled, noScribe attempts to mark instances where two people speak simultaneously. The overlapping section is demarcated with //double slashes//. (Note: This is an experimental feature.) -- **Disfluencies**: If enabled, common speech disfluencies like filler words ("um"), unfinished words or sentences, etc. will also be transcribed. Note that this is not a hard on/off switch, but more of a 'recommendation' for the transcription AI model which only works to some extent. +- **Disfluencies**: If enabled, common speech disfluencies like filler words ("um"), unfinished words or sentences, etc. will also be transcribed. Note that this is not a hard on/off switch, but more of a 'recommendation' for the transcription AI model which only works to some extent. - **Timestamps**: When enabled, noScribe incorporates timestamps in the format [hh:mm:ss] into the transcript either at every change of speaker or every 60 seconds. I find these timestamps somewhat distracting, hence my decision to disable them by default. However, they can be quite useful in certain contexts. Even with timestamps disabled, determining the audio timecode for a specific segment is straightforward: simply open the transcript in the noScribe Editor, navigate through the text, and the corresponding timecode will appear in the bottom right corner of the app. ### Transcription process @@ -186,18 +186,18 @@ Dröge, K. (2025). noScribe. AI-powered Audio Transcription (Version XXX) [Compu - By default, noScribe produces an HTML-file. This can be opened in every common word editor (including MS Word, LibreOffice) or QDA-package (MAXQDA, ATLAS.ti, QualCoder...). - Before working with the transcript though, you should check it with the included editor. There will always be some errors. -### Batch transcription +### Batch transcription (new in version 0.7) - The "Queue" tab in the main window shows a list of all jobs as well as their state and progress. - If you start a new job while another is still running, the new job will wait in the queue to be processed afterwards. -- To start multiple jobs at once with the same settings, select as many files as you want in the audio file dialog. The output files will be named automatically. Use the "Save transcript as" dialog to select a different output folder if needed. Otherwise, the transcripts will be stored in the same folders as the audio. +- To start multiple jobs at once with the same settings, select as many files as you want in the audio file dialog. The output files will be named automatically. Use the "Save transcript as" dialog to select a different output folder if needed. Otherwise, the transcripts will be stored in the same folders as the audio. - The job buttons: - `X` Deletes a job from the list or cancels a running one. - `✔` Opens the transcript in the included editor. This also works for unfinished transcripts in case of an error or if the job was canceled by the user. - - `⟲` Restarts the job (only available in case of errors or cancelation). + - `⟲` Restarts the job (only available in case of errors or cancelation). ## noScribeEdit The included editor to check the final transcript. @@ -224,9 +224,9 @@ The source code of the editor can be found here: [https://github.com/kaixxx/noSc ## Known Issues - The output of this software always needs to be checked for quality, misunderstandings, and wrong speaker diarization. This software is based on [OpenAI's Whisper model](https://github.com/openai/whisper). Typical word error rates can be seen [here](https://github.com/openai/whisper?tab=readme-ov-file#available-models-and-languages). See also [this paper](https://doi.org/10.1145/3576915.3624380) for a comparison of different transcription services and their errors. -- Like any other large language model, the whisper model can sometimes **hallucinate**. This is especially prevalent in silent audio passages or when background noise is treated as "text" (see [this study from the Cornell University](https://facctconference.org/static/papers24/facct24-111.pdf) for more info about the issue). We use voice activity detection (VAD) to filter out sections without speech as best as possible. +- Like any other large language model, the whisper model can sometimes **hallucinate**. This is especially prevalent in silent audio passages or when background noise is treated as "text" (see [this study from the Cornell University](https://facctconference.org/static/papers24/facct24-111.pdf) for more info about the issue). We use voice activity detection (VAD) to filter out sections without speech as best as possible. More severely, users also reported cases where words were hallucinated that would fit syntactically into the context, but were actually not present in the original audio. Such errors are especially hard to catch. -- **Names of people, places or organizations** are often transcribed with errors. +- **Names of people, places or organizations** are often transcribed with errors. - The whisper AI can sometimes get **stuck in a loop of repeating text,** especially on longer audio files. If this happens, try to transcribe shorter sections (using the "Start" and "Stop" fields in noScribe), and join them manually. - **Multilingual audio** is now supported, but experimental. Sometimes it can happen that words in other languages than the main language are translated. - **Nonverbal expressions** like laughter are not included in the transcript and must be added later in the editor if you need them. @@ -238,7 +238,7 @@ The source code of the editor can be found here: [https://github.com/kaixxx/noSc ## Advanced Options -- NoScribe now also includes a command line interface, ideal for scripting. Type in `noScribe.exe --help` for more information. You may also want to use the `--no-gui` option in scripting scenarios. +- NoScribe now also includes a command line interface, ideal for scripting. Type in `noScribe.exe --help` for more information. You may also want to use the `--no-gui` option in scripting scenarios. - Config file: After the app has run for the first time, you will find a file named `config.yml` in the user config directory (on Windows: `C:\Users\\AppData\Local\noScribe\noScribe\config.yml`; on Mac OS: `~/Library/Application Support/noscribe/config.yml`; on Linux: `~/.config/noScribe/config.yml`). Here, you can change a few **extra settings**, e.g., the language of the user interface and model parameters. - Also in the user config directory you will find a folder named `log` with detailed log-files for every transcript (also unfinished ones). This can be helpful in the case of any errors. Be aware though that these files also contain the text of your transcripts which might include sensitive information. - If you want to use **custom whisper models** with noScribe, follow the [instructions in the Wiki](https://github.com/kaixxx/noScribe/wiki/Add-custom-Whisper-models-for-transcription). diff --git a/cli_test.bat b/cli_test.bat index 8968f049..4a1528f9 100644 --- a/cli_test.bat +++ b/cli_test.bat @@ -1,3 +1,3 @@ @echo off & chcp 65001 >nul -python -m noScribe "C:\Users\kai\Documents\Programmierung\2023_WhisperTranscribe\Intw\Ein Gespräch mit Heikedine Körting kurz.mp3" "C:\Users\kai\Documents\Programmierung\2023_WhisperTranscribe\Intw\t.html" \ No newline at end of file +python -m noScribe "C:\Users\kai\Documents\Programmierung\2023_WhisperTranscribe\Intw\Ein Gespräch mit Heikedine Körting kurz.mp3" "C:\Users\kai\Documents\Programmierung\2023_WhisperTranscribe\Intw\t.html" diff --git a/environments/requirements_win_cpu.txt b/environments/requirements_win_cpu.txt index eb39fc07..e177f6a6 100644 --- a/environments/requirements_win_cpu.txt +++ b/environments/requirements_win_cpu.txt @@ -9,8 +9,8 @@ CTkToolTip faster-whisper Pillow pyannote.audio>=4.0 -pyinstaller=6.14.1 +pyinstaller=6.14.1 # The bootloader in the most recent version of pyinstaller is often falsely detected as malware -# by anti virus software, so, it's safer to use a version that has been released a few months ago. +# by anti virus software, so, it's safer to use a version that has been released a few months ago. python-i18n PyYAML diff --git a/environments/requirements_win_cuda.txt b/environments/requirements_win_cuda.txt index 88c0f2a4..97f6780e 100644 --- a/environments/requirements_win_cuda.txt +++ b/environments/requirements_win_cuda.txt @@ -2,7 +2,7 @@ --extra-index-url https://pypi.org/simple # default for everything else torch==2.8 # torch 2.8 with CUDA 12.8 (pyannote 4.0 does not work with torch 2.9) -torchaudio==2.8 +torchaudio==2.8 AdvancedHTMLParser appdirs cpufeature @@ -11,8 +11,8 @@ CTkToolTip faster-whisper Pillow pyannote.audio>=4.0 -pyinstaller==6.14.1 +pyinstaller==6.14.1 # The bootloader in the most recent version of pyinstaller is often falsely detected as malware -# by anti virus software, so, it's safer to use a version that has been released a few months ago. +# by anti virus software, so, it's safer to use a version that has been released a few months ago. python-i18n -PyYAML \ No newline at end of file +PyYAML diff --git a/faster-whisper-test.py b/faster-whisper-test.py index da0e03bc..82301aa8 100644 --- a/faster-whisper-test.py +++ b/faster-whisper-test.py @@ -19,7 +19,7 @@ def format_timestamp(seconds: float, always_include_hours: bool = True, decimal_ return ( f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}" ) - + model_size = "large-v2" # Run on GPU with FP16 @@ -35,4 +35,4 @@ def format_timestamp(seconds: float, always_include_hours: bool = True, decimal_ print("Detected language '%s' with probability %f" % (info.language, info.language_probability)) for segment in segments: - print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text)) \ No newline at end of file + print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text)) diff --git a/generate_linux_binary.sh b/generate_linux_binary.sh index 185fca37..ab468fa2 100644 --- a/generate_linux_binary.sh +++ b/generate_linux_binary.sh @@ -1,4 +1,4 @@ #!/bin/bash docker buildx build --platform linux/amd64 -t noscribe . -docker run --rm -it --mount type=bind,src=.,dst=/usr/src/app noscribe pyinstaller noScribe-linux.spec \ No newline at end of file +docker run --rm -it --mount type=bind,src=.,dst=/usr/src/app noscribe pyinstaller noScribe-linux.spec diff --git a/models/fast/NOSCRIBE_README.txt b/models/fast/NOSCRIBE_README.txt index 51ebf7d9..f7d71ab2 100644 --- a/models/fast/NOSCRIBE_README.txt +++ b/models/fast/NOSCRIBE_README.txt @@ -1,6 +1,6 @@ -The "fast" setting uses Whisper v3 turbo also, but in an int8 quantization. In my testing on CPU, this lead to about 30% faster speed. +The "fast" setting uses Whisper v3 turbo also, but in an int8 quantization. In my testing on CPU, this lead to about 30% faster speed. Download all the files from here into this folder: https://huggingface.co/mukowaty/faster-whisper-int8/tree/main/faster-whisper-large-v3-turbo-int8 -If the files are named like so: "faster-whisper-large-v3-turbo-int8_config.json", please remove the prefix "faster-whisper-large-v3-turbo-int8_". The resulting file in this example must be named "config.json". Do this to all files in the folder. \ No newline at end of file +If the files are named like so: "faster-whisper-large-v3-turbo-int8_config.json", please remove the prefix "faster-whisper-large-v3-turbo-int8_". The resulting file in this example must be named "config.json". Do this to all files in the folder. diff --git a/models/precise/NOSCRIBE_README.txt b/models/precise/NOSCRIBE_README.txt index 8960f4b8..4565f7b7 100644 --- a/models/precise/NOSCRIBE_README.txt +++ b/models/precise/NOSCRIBE_README.txt @@ -1,4 +1,4 @@ -The "precise" setting uses Whisper v3 turbo. +The "precise" setting uses Whisper v3 turbo. Download all the files from here into this folder: -https://huggingface.co/mobiuslabsgmbh/faster-whisper-large-v3-turbo \ No newline at end of file +https://huggingface.co/mobiuslabsgmbh/faster-whisper-large-v3-turbo diff --git a/noScribe/CTkToolTips.py b/noScribe/CTkToolTips.py index 91b3a751..373ad229 100644 --- a/noScribe/CTkToolTips.py +++ b/noScribe/CTkToolTips.py @@ -20,7 +20,7 @@ def __init__(self, widget, text='widget info', fg_color=None): :param widget: The widget object to assign the tooltip to. :param text: The text to be displayed as the tooltip. - :param fg_color: Hex colour code (#RRGGBB), defining the colour of the tooltip. + :param fg_color: Hex colour code (#RRGGBB), defining the colour of the tooltip. """ if fg_color is None: self.fg_color = self.get_color_from_name('CTkFrame', 'fg_color') @@ -35,7 +35,7 @@ def __init__(self, widget, text='widget info', fg_color=None): self.y_offset: int = +10 self.corner_radius: int = 10 self.border_width: int = 1 - self.padding: tuple = (10, 2) + self.padding: tuple = (10, 2) # Bind to the primary widget; use add="+" to avoid clobbering existing bindings self._widget.bind("", self.on_enter, add="+") self._widget.bind("", self.on_leave, add="+") @@ -73,7 +73,7 @@ def on_enter(self, event=None): def on_leave(self, event=None): self._unschedule() self.hide_tooltip() - + def set_text(self, text): self.text = text @@ -94,7 +94,7 @@ def show_tooltip(self, event=None): # Leaves only the label and removes the app window self._tw.wm_overrideredirect(True) - + if sys.platform.startswith("win"): self._tw.transparent_color = self._widget._apply_appearance_mode( ThemeManager.theme["CTkToplevel"]["fg_color"]) @@ -112,11 +112,11 @@ def show_tooltip(self, event=None): # self.resizable(width=True, height=True) # Make the background transparent - self._tw.config(background=self._tw.transparent_color) - + self._tw.config(background=self._tw.transparent_color) + #self._tw.wm_attributes("-transparentcolor", "white") # Set transparent color # self._tw.wm_geometry("+%d+%d" % (x, y)) - + # create frame and label self.frame = ctk.CTkFrame( self._tw, @@ -134,7 +134,7 @@ def show_tooltip(self, event=None): ) # Pack the frame into the Toplevel so the label becomes visible self.frame.pack(fill="both", expand=True) - + # Determine current pointer position for robust placement (works with synthetic events) try: pointer_x, pointer_y = self._widget.winfo_pointerxy() @@ -157,7 +157,7 @@ def show_tooltip(self, event=None): # Position the tooltip near the pointer self._tw.geometry(f"+{pointer_x + offset_x}+{pointer_y + self.y_offset}") - + # label = tk.Label(self._tw, text=self._text, justify='left', fg=self._fg_color, # bg=self._bg_colour, relief='solid', borderwidth=1, # wraplength=self._wrap_length) @@ -168,7 +168,7 @@ def hide_tooltip(self): self._tw = None if tw: tw.destroy() - + @staticmethod def get_color_from_name(widget, name: str): """Gets the colour code associated with the supplied widget property, diff --git a/noScribe/main.py b/noScribe/main.py index e78d0dd2..b4410900 100644 --- a/noScribe/main.py +++ b/noScribe/main.py @@ -95,11 +95,11 @@ - @@ -181,10 +181,10 @@ with open(config_file, 'r') as file: config = yaml.safe_load(file) if not config: - raise # config file is empty (None) + raise # config file is empty (None) except: # seems we run it for the first time and there is no config file config = {} - + def get_config(key: str, default) -> str: """ Get a config value, set it if it doesn't exist """ if key not in config: @@ -216,14 +216,14 @@ def _is_cuda_error_message(message: str) -> bool: return any(keyword in lower_message for keyword in _CUDA_ERROR_KEYWORDS) def version_higher(version1, version2, subversion_level=99) -> int: - """Will return + """Will return 1 if version1 is higher 2 if version2 is higher - 0 if both are equal - - subversion_level: Adjusts how deep suversions are compared. + 0 if both are equal + + subversion_level: Adjusts how deep suversions are compared. If subversion_level = 1, "0.7.3" and "0.7.4" will be equal, because the comparison - stops after the first level of subversions ("0.7"). + stops after the first level of subversions ("0.7"). Default: 99 """ version1_elems = version1.split('.') @@ -243,7 +243,7 @@ def version_higher(version1, version2, subversion_level=99) -> int: break # must be completely equal return 0 - + config['app_version'] = app_version def save_config(): @@ -299,7 +299,7 @@ class JobStatus(Enum): class TranscriptionJob: """Represents a single transcription job with all its parameters and status""" - + def __init__(self): # Status tracking self.status: JobStatus = JobStatus.WAITING @@ -308,31 +308,31 @@ def __init__(self): self.created_at: datetime.datetime = datetime.datetime.now() self.started_at: Optional[datetime.datetime] = None self.finished_at: Optional[datetime.datetime] = None - + # Progress tracking self.progress: float = 0.0 # Progress from 0.0 to 1.0 - + # File paths self.audio_file: str = '' self.transcript_file: str = '' # Partial transcript tracking self.has_partial_transcript: bool = False - + # Time range self.start: int = 0 # milliseconds self.stop: int = 0 # milliseconds (0 means until end) - + # Language and model settings self.language_name: str = 'Auto' self.whisper_model: transcription.WhisperModel = None - + # Processing options self.speaker_detection: str = 'auto' self.overlapping: bool = True self.timestamps: bool = False self.disfluencies: bool = True self.pause: int = 0 # index value (0=none, 1=1sec+, etc.) - + # Config-based options self.whisper_beam_size: int = 1 self.whisper_temperature: float = 0.0 @@ -341,22 +341,22 @@ def __init__(self): self.timestamp_color: str = '#78909C' self.pause_marker: str = '.' self.auto_save: bool = True - self.whisper_xpu: str = 'cpu' + self.whisper_xpu: str = 'cpu' self.vad_threshold: float = 0.5 - + # Derived properties self.file_ext: str = '' - + def set_running(self): """Mark job as running and record start time""" self.status = JobStatus.AUDIO_CONVERSION self.started_at = datetime.datetime.now() - + def set_finished(self): """Mark job as finished and record completion time""" self.status = JobStatus.FINISHED self.finished_at = datetime.datetime.now() - + def set_error(self, error_message: str, error_tb: str = ''): """Mark job as failed and store error message""" self.status = JobStatus.ERROR @@ -369,7 +369,7 @@ def set_canceled(self, message: Optional[str] = None): self.status = JobStatus.CANCELED self.error_message = message self.finished_at = datetime.datetime.now() - + def get_duration(self) -> Optional[datetime.timedelta]: """Get processing duration if job is completed""" if self.started_at and self.finished_at: @@ -452,30 +452,30 @@ def yn(v: bool) -> str: pass return "\n".join([ln for ln in lines if ln]) - + class TranscriptionQueue: """Manages a queue of transcription jobs""" - + def __init__(self): self.jobs: list[TranscriptionJob] = [] self.current_job: Optional[TranscriptionJob] = None # Track currently running job - + def add_job(self, job: TranscriptionJob): """Add a job to the queue""" self.jobs.append(job) - + def get_waiting_jobs(self) -> list[TranscriptionJob]: """Get all jobs with WAITING status""" return [job for job in self.jobs if job.status == JobStatus.WAITING] - + def get_running_jobs(self) -> list[TranscriptionJob]: """Get all jobs currently being processed""" return [job for job in self.jobs if job.status in [JobStatus.AUDIO_CONVERSION, JobStatus.SPEAKER_IDENTIFICATION, JobStatus.TRANSCRIPTION, JobStatus.CANCELING]] - + def get_finished_jobs(self) -> list[TranscriptionJob]: """Get all successfully completed jobs""" return [job for job in self.jobs if job.status == JobStatus.FINISHED] - + def get_failed_jobs(self) -> list[TranscriptionJob]: """Get all jobs that encountered errors""" return [job for job in self.jobs if job.status == JobStatus.ERROR] @@ -483,20 +483,20 @@ def get_failed_jobs(self) -> list[TranscriptionJob]: def get_canceled_jobs(self) -> list[TranscriptionJob]: """Get all jobs that were canceled by the user""" return [job for job in self.jobs if job.status == JobStatus.CANCELED] - + def has_pending_jobs(self) -> bool: """Check if there are jobs waiting to be processed""" return len(self.get_waiting_jobs()) > 0 - + def is_running(self) -> bool: """Check if any job are currently beeing processed""" return len(self.get_running_jobs()) > 0 - + def get_next_waiting_job(self) -> Optional[TranscriptionJob]: """Get the next job to process""" waiting_jobs = self.get_waiting_jobs() return waiting_jobs[0] if waiting_jobs else None - + def get_queue_summary(self) -> dict: """Get summary statistics of the queue""" return { @@ -507,11 +507,11 @@ def get_queue_summary(self) -> dict: 'errors': len(self.get_failed_jobs()), 'canceled': len(self.get_canceled_jobs()), } - + def is_empty(self) -> bool: """Check if queue is empty""" return len(self.jobs) == 0 - + def has_output_conflict(self, transcript_file: str, ignore_job: Optional[TranscriptionJob] = None) -> bool: """Check if another queue job uses the same output file. Ignores jobs in ERROR, CANCELING, CANCELED and optionally a given job.""" @@ -544,7 +544,7 @@ def confirm_output_override(self, transcript_file: str, ignore_job: Optional[Tra except Exception: pass return True - + # Command Line Interface @@ -553,12 +553,12 @@ def create_transcription_job(audio_file=None, transcript_file=None, start_time=N overlapping=None, timestamps=None, disfluencies=None, pause=None, cli_mode=False) -> TranscriptionJob: """Create a TranscriptionJob with all default values - + This function handles both CLI and GUI job creation, ensuring all defaults are consistent between both modes. """ job = TranscriptionJob() - + # File paths job.audio_file = audio_file or '' job.transcript_file = transcript_file or '' @@ -566,11 +566,11 @@ def create_transcription_job(audio_file=None, transcript_file=None, start_time=N job.file_ext = os.path.splitext(job.transcript_file)[1][1:] if not job.file_ext in ['html', 'txt', 'vtt']: raise Exception(t('err_unsupported_output_format', file_type=job.file_ext)) - + # Time range job.start = start_time if start_time is not None else 0 job.stop = stop_time if stop_time is not None else 0 - + # Language - handle both language names and codes if language_name: if language_name in languages.values(): @@ -583,16 +583,16 @@ def create_transcription_job(audio_file=None, transcript_file=None, start_time=N raise ValueError(f"Unknown language: {language_name}") else: job.language_name = 'Auto' - + # Model (will be validated later when we have access to the app instance) job.whisper_model = whisper_model_name or 'precise' - + # Processing options with defaults job.speaker_detection = speaker_detection if speaker_detection is not None else 'auto' job.overlapping = overlapping if overlapping is not None else True job.timestamps = timestamps if timestamps is not None else False job.disfluencies = disfluencies if disfluencies is not None else True - + # Pause setting if pause is not None: if isinstance(pause, str): @@ -605,7 +605,7 @@ def create_transcription_job(audio_file=None, transcript_file=None, start_time=N job.pause = pause else: job.pause = 1 # default to '1sec+' - + # Config-based options (use defaults from config) job.whisper_beam_size = get_config('whisper_beam_size', 1) job.whisper_temperature = get_config('whisper_temperature', 0.0) @@ -614,11 +614,11 @@ def create_transcription_job(audio_file=None, transcript_file=None, start_time=N job.timestamp_color = get_config('timestamp_color', '#78909C') job.pause_marker = get_config('pause_seconds_marker', '.') job.auto_save = False if get_config('auto_save', 'True') == 'False' else True - + job.vad_threshold = float(get_config('voice_activity_detection_threshold', '0.5')) - + # Platform-specific XPU settings - """ + """ if platform.system() == "Darwin": # MAC xpu = get_config('pyannote_xpu', 'mps' if platform.mac_ver()[0] >= '12.3' else 'cpu') job.pyannote_xpu = 'mps' if xpu == 'mps' else 'cpu' @@ -633,8 +633,8 @@ def create_transcription_job(audio_file=None, transcript_file=None, start_time=N job.whisper_xpu = 'cuda' if whisper_xpu == 'cuda' else 'cpu' else: raise Exception('Platform not supported yet.') - """ - + """ + # Check for invalid VTT options if job.file_ext == 'vtt' and (job.pause > 0 or job.overlapping or job.timestamps): if cli_mode: @@ -642,7 +642,7 @@ def create_transcription_job(audio_file=None, transcript_file=None, start_time=N job.pause = 0 job.overlapping = False job.timestamps = False - + return job def create_job_from_cli_args(args) -> TranscriptionJob: @@ -679,17 +679,17 @@ def parse_cli_args(): python -m noScribe --help-models # Show available models """ ) - + # Special argument to show available models parser.add_argument('--help-models', action='store_true', help='Show available Whisper models and exit') - + # Required arguments (when not using --help-models) parser.add_argument('audio_file', nargs='?', help='Input audio file path') - parser.add_argument('output_file', nargs='?', + parser.add_argument('output_file', nargs='?', help='Output transcript file path (.html, .txt, or .vtt)') - + # Optional arguments parser.add_argument('--no-gui', action='store_true', default=False, help='Run without showing the GUI (headless mode)') @@ -703,7 +703,7 @@ def parse_cli_args(): help='Whisper model to use (use --help-models to see available models)') parser.add_argument('--speaker-detection', choices=['none', 'auto', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10'], default=None, help='Speaker detection/diarization setting') - parser.add_argument('--overlapping', action='store_true', default=None, + parser.add_argument('--overlapping', action='store_true', default=None, help='Enable overlapping speech detection') parser.add_argument('--no-overlapping', action='store_false', dest='overlapping', default=None, help='Disable overlapping speech detection') @@ -717,7 +717,7 @@ def parse_cli_args(): help='Exclude disfluencies from transcript') parser.add_argument('--pause', choices=['none', '1sec+', '2sec+', '3sec+'], default=None, help='Mark pauses in transcript') - + return parser.parse_args() @@ -750,75 +750,75 @@ def format(self, event): class JobEntryFrame(ctk.CTkFrame, CTkScalingBaseClass): """A custom frame that can display a progress bar as its background with text overlays""" - + def __init__(self, master, progress=0.0, progress_color=None, **kwargs): ctk.CTkFrame.__init__(self, master, **kwargs) CTkScalingBaseClass.__init__(self, scaling_type="widget") if not progress_color: progress_color = ctk.ThemeManager.theme['CTkProgressBar']['progress_color'][1] - + self.progress = progress self.progress_color = progress_color self.base_color = self._fg_color self.show_progress = False # Only show progress during processing - + # Store text content self.name_text = "" self.status_text = "" self.status_color = "lightgray" - + # Create a canvas to draw the progress background and text self.progress_canvas = tk.Canvas(self, highlightthickness=0) self.progress_canvas.place(x=0, y=0, relwidth=1, relheight=1) - + # Forward mouse events from canvas to frame for CTkToolTip functionality self.progress_canvas.bind("", self._on_canvas_enter) self.progress_canvas.bind("", self._on_canvas_leave) - + # Bind to configure event to redraw when size changes self.bind('', self._on_configure) - + # Update the progress display self._update_progress_display() - + def destroy(self): """Override destroy to properly clean up scaling callbacks""" CTkScalingBaseClass.destroy(self) ctk.CTkFrame.destroy(self) - + def set_progress(self, progress, show_progress=True): """Set the progress value (0.0 to 1.0) and whether to show progress bar""" self.progress = max(0.0, min(1.0, progress)) self.show_progress = show_progress self._update_progress_display() - + def set_name_text(self, text): """Set the name text to display""" self.name_text = text self._update_progress_display() - + def set_status_text(self, text, color="lightgray"): """Set the status text and color to display""" self.status_text = text self.status_color = color self._update_progress_display() - + def bind_click(self, callback): """Bind click event to the canvas""" self.progress_canvas.bind("", callback) - + def unbind_click(self): """Unbind click event from the canvas""" self.progress_canvas.unbind("") - + def configure_cursor(self, cursor): """Configure cursor for the canvas""" self.progress_canvas.configure(cursor=cursor) - + def _on_configure(self, event=None): """Handle resize events""" self._update_progress_display() - + def _get_scaled_font_size(self): """Calculate font size based on frame height and use CustomTkinter's scaling""" try: @@ -827,42 +827,42 @@ def _get_scaled_font_size(self): return scaled_font[1] except: return 13 # Fallback - + def _on_canvas_enter(self, event): """Forward canvas Enter event to frame for CTkToolTip""" # Generate a synthetic Enter event for the frame self.event_generate("") - + def _on_canvas_leave(self, event): """Forward canvas Leave event to frame for CTkToolTip""" # Generate a synthetic Leave event for the frame self.event_generate("") - + def _update_progress_display(self): """Update the progress bar display and text""" if not self.progress_canvas.winfo_exists(): return - + # Clear the canvas self.progress_canvas.delete("all") - + # Get canvas dimensions width = self.progress_canvas.winfo_width() height = self.progress_canvas.winfo_height() - + if width <= 1 or height <= 1: # Canvas not ready yet self.after(10, self._update_progress_display) return - + # Calculate button area width to avoid overlap (1 button = 30px + padding) # Reserve space for up to 3 buttons (X, ⟲/✔, ✔) button_area_width = 3 * self._apply_widget_scaling(30 + 5) - + # Draw base background base_color = self.base_color[1] if isinstance(self.base_color, tuple) else self.base_color self.progress_canvas.configure(bg=base_color) - + # Draw progress bar only if show_progress is True and there's progress if self.show_progress and self.progress > 0: progress_width = int((width - button_area_width) * self.progress) @@ -871,10 +871,10 @@ def _update_progress_display(self): fill=self.progress_color, outline="" ) - + # Calculate font size based on screen scaling font_size = self._get_scaled_font_size() - + # Draw text overlays if self.name_text: self.progress_canvas.create_text( @@ -884,7 +884,7 @@ def _update_progress_display(self): fill="lightgray", font=("", font_size) ) - + if self.status_text: # Position status text to avoid button overlap status_x = width - button_area_width - self._apply_widget_scaling(5) @@ -982,7 +982,7 @@ def __init__(self): self.scrollable_options = ctk.CTkScrollableFrame(self.sidebar_frame, width=300, corner_radius=0, fg_color='transparent') self.scrollable_options.pack(padx=0, pady=0, anchor='w', fill='both', expand=True) self.bind('', self.on_resize) # Bind the configure event of options_frame to a check_scrollbar requirement function - + # input audio file self.label_audio_file = ctk.CTkLabel(self.scrollable_options, text=t('label_audio_file')) self.label_audio_file.pack(padx=20, pady=[20,0], anchor='w') @@ -990,9 +990,9 @@ def __init__(self): self.frame_audio_file = ctk.CTkFrame(self.scrollable_options, width=260, height=33, corner_radius=8, border_width=2) self.frame_audio_file.pack(padx=20, pady=[0,10], anchor='w') - self.button_audio_file_name = ctk.CTkButton(self.frame_audio_file, width=200, corner_radius=8, bg_color='transparent', - fg_color='transparent', hover_color=self.frame_audio_file._bg_color, - border_width=0, anchor='w', + self.button_audio_file_name = ctk.CTkButton(self.frame_audio_file, width=200, corner_radius=8, bg_color='transparent', + fg_color='transparent', hover_color=self.frame_audio_file._bg_color, + border_width=0, anchor='w', text=t('label_audio_file_name'), command=self.button_audio_file_event) self.button_audio_file_name.place(x=3, y=3) @@ -1006,9 +1006,9 @@ def __init__(self): self.frame_transcript_file = ctk.CTkFrame(self.scrollable_options, width=260, height=33, corner_radius=8, border_width=2) self.frame_transcript_file.pack(padx=20, pady=[0,10], anchor='w') - self.button_transcript_file_name = ctk.CTkButton(self.frame_transcript_file, width=200, corner_radius=8, bg_color='transparent', - fg_color='transparent', hover_color=self.frame_transcript_file._bg_color, - border_width=0, anchor='w', + self.button_transcript_file_name = ctk.CTkButton(self.frame_transcript_file, width=200, corner_radius=8, bg_color='transparent', + fg_color='transparent', hover_color=self.frame_transcript_file._bg_color, + border_width=0, anchor='w', text=t('label_transcript_file_name'), command=self.button_transcript_file_event) self.button_transcript_file_name.place(x=3, y=3) @@ -1049,8 +1049,8 @@ def __init__(self): self.option_menu_language.set(last_language) else: self.option_menu_language.set('Auto') - - # Whisper Model Selection + + # Whisper Model Selection class CustomCTkOptionMenu(ctk.CTkOptionMenu): # Custom version that reads available models on drop down def __init__(self, noScribe_parent, master, width = 140, height = 28, corner_radius = None, bg_color = "transparent", fg_color = None, button_color = None, button_hover_color = None, text_color = None, text_color_disabled = None, dropdown_fg_color = None, dropdown_hover_color = None, dropdown_text_color = None, font = None, dropdown_font = None, values = None, variable = None, state = tk.NORMAL, hover = True, command = None, dynamic_resizing = True, anchor = "w", **kwargs): @@ -1065,7 +1065,7 @@ def _clicked(self, event=0): self._values.append(t('label_add_custom_models')) self._dropdown_menu.configure(values=self._values) super()._clicked(event) - + def _dropdown_callback(self, value: str): if value == self._values[-2]: # divider return @@ -1086,12 +1086,12 @@ def _dropdown_callback(self, value: str): self.noScribe_parent.logn(f"Failed to open folder: {e}") else: super()._dropdown_callback(value) - + self.label_whisper_model = ctk.CTkLabel(self.frame_options, text=t('label_whisper_model')) self.label_whisper_model.grid(column=0, row=3, sticky='w', pady=5) - self.option_menu_whisper_model = CustomCTkOptionMenu(self, - self.frame_options, + self.option_menu_whisper_model = CustomCTkOptionMenu(self, + self.frame_options, width=100, values=list(self.whisper_models.keys()), dynamic_resizing=False) @@ -1129,7 +1129,7 @@ def _dropdown_callback(self, value: str): self.check_box_overlapping.select() else: self.check_box_overlapping.deselect() - + # Disfluencies self.label_disfluencies = ctk.CTkLabel(self.frame_options, text=t('label_disfluencies')) self.label_disfluencies.grid(column=0, row=7, sticky='w', pady=5) @@ -1153,7 +1153,7 @@ def _dropdown_callback(self, value: str): self.check_box_timestamps.select() else: self.check_box_timestamps.deselect() - + # Start control: single CTkOptionMenu styled like a button # Create a container so we can show/hide as one control self.start_button_container = ctk.CTkFrame(self.sidebar_frame, fg_color='transparent') @@ -1224,15 +1224,15 @@ def _on_text_label_click(self, event): self.start_action_menu = StartActionOptionMenu(self, self.start_button_container) self.start_action_menu.pack(padx=[0,0], fill='x', expand=True) - + # create queue view and log textbox self.frame_right = ctk.CTkFrame(self.frame_main, corner_radius=0, fg_color='transparent') self.frame_right.pack(padx=0, pady=0, fill='both', expand=True, side='top') - + self.tabview = ctk.CTkTabview(self.frame_right, anchor="nw", border_width=0, fg_color='transparent', corner_radius=0) self.tabview.pack(padx=[10,30], pady=[0,30], fill='both', expand=True, side='top') - self.tab_log = self.tabview.add(t("tab_log")) - self.tab_queue = self.tabview.add(t("tab_queue")) + self.tab_log = self.tabview.add(t("tab_log")) + self.tab_queue = self.tabview.add(t("tab_queue")) self.tabview.set(t("tab_log")) # set currently visible tab self.log_frame = ctk.CTkFrame(self.tab_log, fg_color='transparent', border_width=1, corner_radius=0) @@ -1242,14 +1242,14 @@ def _on_text_label_click(self, event): self.log_textbox.tag_config('error', foreground='yellow') self.log_textbox.pack(padx=5, pady=5, expand=True, fill='both') self.log_len = 0 - + self.log_progress_frame = ctk.CTkFrame(self.log_frame, fg_color='transparent') - self.log_progress_frame.pack(padx=10, pady=10, fill='x', expand=False, anchor='center') + self.log_progress_frame.pack(padx=10, pady=10, fill='x', expand=False, anchor='center') self.log_edit_btn = ctk.CTkButton( self.log_progress_frame, text=t('editor_button'), width=100, - fg_color=self.log_textbox._scrollbar_button_color, + fg_color=self.log_textbox._scrollbar_button_color, command=lambda: self.launch_editor() ) self.log_edit_btn.pack(side='right', padx=(0, 0), pady=0) @@ -1266,15 +1266,15 @@ def _on_text_label_click(self, event): self.log_progress_bar = ctk.CTkProgressBar(self.log_progress_frame, mode='determinate', fg_color="gray17") self.log_progress_bar.set(0) - + self.hyperlink = HyperlinkManager(self.log_textbox._textbox) # Queue table self.queue_frame = ctk.CTkFrame(self.tab_queue, fg_color='transparent', border_width=1, corner_radius=0) - self.queue_frame.pack(padx=0, pady=0, expand=True, fill='both') + self.queue_frame.pack(padx=0, pady=0, expand=True, fill='both') self.queue_frame = ctk.CTkFrame(self.queue_frame, fg_color='transparent') self.queue_frame.pack(padx=5, pady=5, fill='both', expand=True) - + # Scrollable frame for queue entries self.queue_scrollable = ctk.CTkScrollableFrame(self.queue_frame, bg_color='transparent', fg_color='transparent') self.queue_scrollable.pack(fill='both', expand=True, padx=0, pady=(0, 0)) @@ -1287,7 +1287,7 @@ def _on_text_label_click(self, event): self.queue_controls_frame, text=t('editor_button'), width=100, - fg_color=self.log_textbox._scrollbar_button_color, + fg_color=self.log_textbox._scrollbar_button_color, command=lambda: self.launch_editor() ) self.queue_edit_btn.pack(side='right', padx=(0, 5), pady=5) @@ -1316,12 +1316,12 @@ def _on_text_label_click(self, event): self.update_queue_table() self.update_scrollbar_visibility() - + self.logn(t('welcome_message'), 'highlight') self.log(t('welcome_credits', v=app_version, y=app_year)) self.logn('https://github.com/kaixxx/noScribe', link='https://github.com/kaixxx/noScribe#readme') self.logn(t('welcome_instructions')) - + # check for new releases if get_config('check_for_update', 'True') == 'True': try: @@ -1340,29 +1340,29 @@ def _on_text_label_click(self, event): self.logn() except: pass - + # Events and Methods def on_whisper_model_selected(self, value): print(self.option_menu_whisper_model.old_value) print(value) - + def on_resize(self, event): self.update_scrollbar_visibility() def update_scrollbar_visibility(self): # Get the size of the scroll region and current canvas size - canvas = self.scrollable_options._parent_canvas + canvas = self.scrollable_options._parent_canvas scroll_region_height = canvas.bbox("all")[3] - canvas_height = canvas.winfo_height() - + canvas_height = canvas.winfo_height() + scrollbar = self.scrollable_options._scrollbar if scroll_region_height > canvas_height: scrollbar.grid() else: - scrollbar.grid_remove() # Hide the scrollbar if not needed - + scrollbar.grid_remove() # Hide the scrollbar if not needed + def update_queue_table(self): """Update the queue table by diffing: update existing rows, add new ones, remove missing.""" if getattr(self, '_headless', False): @@ -1406,7 +1406,7 @@ def update_queue_table(self): pass status_text = t(str(job.status.value)) - + btn_color = ctk.ThemeManager.theme['CTkScrollbar']['button_color'] if hasattr(self, 'queue_row_widgets') and job_key in self.queue_row_widgets: @@ -1415,7 +1415,7 @@ def update_queue_table(self): # Update text directly on the JobEntryFrame canvas row['frame'].set_name_text(audio_name) row['frame'].set_status_text(status_text, status_color) - + # Update progress bar visibility based on job status is_processing = job.status in [JobStatus.AUDIO_CONVERSION, JobStatus.SPEAKER_IDENTIFICATION, JobStatus.TRANSCRIPTION] if is_processing: @@ -1438,7 +1438,7 @@ def update_queue_table(self): ) repeat_btn.pack(side='right', padx=(0, 4), pady=5) row['repeat_btn'] = repeat_btn - row['repeat_tt'] = CTkToolTip(repeat_btn, text=t('queue_tt_repeat_job')) + row['repeat_tt'] = CTkToolTip(repeat_btn, text=t('queue_tt_repeat_job')) else: if not row['repeat_btn'].winfo_ismapped(): row['repeat_btn'].pack(side='right', padx=(0, 4), pady=2) @@ -1493,7 +1493,7 @@ def update_queue_table(self): ) partial_btn.pack(side='right', padx=(0, 4), pady=5) row['partial_btn'] = partial_btn - row['partial_tt'] = CTkToolTip(partial_btn, text=t('queue_tt_open_partial_job')) + row['partial_tt'] = CTkToolTip(partial_btn, text=t('queue_tt_open_partial_job')) else: if not row['partial_btn'].winfo_ismapped(): row['partial_btn'].pack(side='right', padx=(0, 4), pady=2) @@ -1520,7 +1520,7 @@ def update_queue_table(self): ) edit_btn.pack(side='right', padx=(0, 4), pady=5) row['edit_btn'] = edit_btn - row['edit_tt'] = CTkToolTip(edit_btn, text=t('queue_tt_edit_job')) + row['edit_tt'] = CTkToolTip(edit_btn, text=t('queue_tt_edit_job')) else: if not row['edit_btn'].winfo_ismapped(): row['edit_btn'].pack(side='right', padx=(0, 4), pady=2) @@ -1544,11 +1544,11 @@ def update_queue_table(self): fg_color = ctk.ThemeManager.theme['CTkSegmentedButton']['unselected_color'][1] entry_frame = JobEntryFrame(self.queue_scrollable, progress=job.progress, progress_color=None, fg_color=fg_color) entry_frame.pack(fill='x', padx=(0, 5), pady=2) - + # Set the text directly on the JobEntryFrame canvas entry_frame.set_name_text(audio_name) entry_frame.set_status_text(status_text, status_color) - + # Set progress bar visibility based on job status is_processing = job.status in [JobStatus.AUDIO_CONVERSION, JobStatus.SPEAKER_IDENTIFICATION, JobStatus.TRANSCRIPTION] if is_processing: @@ -1567,7 +1567,7 @@ def update_queue_table(self): hover_color=('darkred'), command=lambda j=job: self._on_queue_row_action(j) ) - cancel_btn.pack(side='right', padx=(0, 6), pady=5) + cancel_btn.pack(side='right', padx=(0, 6), pady=5) # Tooltip for X button per status if job.status == JobStatus.WAITING: cancel_tt_text = t('queue_tt_remove_waiting') @@ -1576,8 +1576,8 @@ def update_queue_table(self): else: cancel_tt_text = t('queue_tt_remove_entry') cancel_tt = CTkToolTip(cancel_btn, text=cancel_tt_text) - - # Repeat button (job status canceled or error only) + + # Repeat button (job status canceled or error only) repeat_btn = None repeat_tt = None if job.status in [JobStatus.ERROR, JobStatus.CANCELED]: @@ -1623,7 +1623,7 @@ def update_queue_table(self): command=lambda j=job: self._on_queue_row_edit(j) ) edit_btn.pack(side='right', padx=(0, 4), pady=5) - edit_tt = CTkToolTip(edit_btn, text=t('queue_tt_edit_job')) + edit_tt = CTkToolTip(edit_btn, text=t('queue_tt_edit_job')) # Row tooltip (create once per row) tt_frame = CTkToolTip(entry_frame, text=job_tooltip) #, bg_color='gray') @@ -1652,7 +1652,7 @@ def update_queue_table(self): row = self.queue_row_widgets.pop(key) if row['frame'].winfo_exists(): row['frame'].destroy() - + # Update queue tab title new_name = f'{t("tab_queue")} ({len(self.queue.jobs) - len(self.queue.get_waiting_jobs()) - len(self.queue.get_running_jobs())}/{len(self.queue.jobs)})' old_name = self.tabview._name_list[1] @@ -1707,7 +1707,7 @@ def on_queue_stop(self, ask_before_canceling=True) -> bool: Returns False if user does not confirm cancelation.""" try: if (ask_before_canceling and - (self.queue.is_running() or self.queue.has_pending_jobs()) and + (self.queue.is_running() or self.queue.has_pending_jobs()) and not tk.messagebox.askyesno(title='noScribe', message=t('queue_cancel_all_confirm'))): return False # Mark waiting jobs as canceled immediately @@ -1814,7 +1814,7 @@ def _on_queue_row_repeat(self, job: TranscriptionJob): wkr.start() except Exception as e: self.logn(f'Queue repeat error: {e}', 'error') - + def _on_queue_row_edit(self, job: TranscriptionJob): self.openLink(f'file://{job.transcript_file}') @@ -1842,15 +1842,15 @@ def _on_queue_row_open_partial(self, job: TranscriptionJob): def launch_editor(self, file=''): # Launch the editor in a separate process so that in can stay running even if noScribe quits. - # Source: https://stackoverflow.com/questions/13243807/popen-waiting-for-child-process-even-when-the-immediate-child-has-terminated/13256908#13256908 + # Source: https://stackoverflow.com/questions/13243807/popen-waiting-for-child-process-even-when-the-immediate-child-has-terminated/13256908#13256908 # set system/version dependent "start_new_session" analogs - + if file == '': # get last finished job (if any) jobs = self.queue.get_finished_jobs() if len(jobs) > 0: file = jobs[-1].transcript_file - + if file == '': # no file or finished job to open if not tk.messagebox.askyesno(title='noScribe', message=t('err_editor_no_file')): @@ -1881,8 +1881,8 @@ def launch_editor(self, file=''): # from msdn [1] CREATE_NEW_PROCESS_GROUP = 0x00000200 # note: could get it from subprocess DETACHED_PROCESS = 0x00000008 # 0x8 | 0x200 == 0x208 - kwargs.update(creationflags=DETACHED_PROCESS | CREATE_NEW_PROCESS_GROUP) - else: # should work on all POSIX systems, Linux and macOS + kwargs.update(creationflags=DETACHED_PROCESS | CREATE_NEW_PROCESS_GROUP) + else: # should work on all POSIX systems, Linux and macOS kwargs.update(start_new_session=True) if program is not None and os.path.exists(program): @@ -1898,33 +1898,33 @@ def launch_editor(self, file=''): def openLink(self, link: str) -> None: if link.startswith('file://') and link.endswith('.html'): self.launch_editor(link[7:]) - else: + else: webbrowser.open(link) - + def log(self, txt: str = '', tags: list = [], where: str = 'both', link: str = '', tb: str = '') -> None: - """ Log to main window (where can be 'screen', 'file', or 'both') + """ Log to main window (where can be 'screen', 'file', or 'both') tb = formatted traceback of the error, only logged to file """ - + # Handle screen logging if requested and textbox exists - if where != 'file': + if where != 'file': if txt[:-1] != t('welcome_instructions'): - print(txt, end='') + print(txt, end='') if not getattr(self, '_headless', False) and hasattr(self, 'log_textbox') and self.log_textbox.winfo_exists(): try: self.log_textbox.configure(state=tk.NORMAL) # To prevent slowing down the UI, limit the content of log_textbox to max 5000 characters if self.log_len > 5000: self.log_textbox.delete("1.0", f"1.0 + {self.log_len - 3000} chars") # keep the last 3000 - self.log_len = 3000 - + self.log_len = 3000 + if link: tags = tags + self.hyperlink.add(partial(self.openLink, link)) - + self.log_textbox.insert(tk.END, txt, tags) self.log_textbox.yview_moveto(1) # Scroll to last line self.log_len += len(txt) - + # Schedule disabling the textbox in the main thread self.log_textbox.after(0, lambda: self.log_textbox.configure(state=tk.DISABLED)) except Exception as e: @@ -1938,7 +1938,7 @@ def log(self, txt: str = '', tags: list = [], where: str = 'both', link: str = ' if tags == 'error': txt = f'ERROR: {txt}' if tb != '': - txt = f'{txt}\nTraceback:\n{tb}' + txt = f'{txt}\nTraceback:\n{tb}' self.log_file.write(txt) self.log_file.flush() except Exception as e: @@ -1991,8 +1991,8 @@ def create_default_transcript_names(self, dir=None): self.logn(log_msg) def button_audio_file_event(self): - fn = tk.filedialog.askopenfilename(initialdir=os.path.dirname(self.audio_files_list[0] if len(self.audio_files_list) > 0 else ''), - initialfile=" ".join(f'"{os.path.basename(path)}"' for path in self.audio_files_list), + fn = tk.filedialog.askopenfilename(initialdir=os.path.dirname(self.audio_files_list[0] if len(self.audio_files_list) > 0 else ''), + initialfile=" ".join(f'"{os.path.basename(path)}"' for path in self.audio_files_list), multiple=True) if fn and len(fn) > 0: self.audio_files_list = fn @@ -2011,17 +2011,17 @@ def button_transcript_file_event(self): if len(self.audio_files_list) == 0: # select audio first tk.messagebox.showerror(title='noScribe', message=t('err_no_audio_file')) - return + return if len(self.transcript_files_list) > 0: _initialdir = os.path.dirname(self.transcript_files_list[0]) _initialfile = os.path.basename(self.transcript_files_list[0]) else: _initialdir = '' - _initialfile = '' + _initialfile = '' if not ('last_filetype' in config): config['last_filetype'] = 'html' filetypes = [ - ('noScribe Transcript','*.html'), + ('noScribe Transcript','*.html'), ('Text only','*.txt'), ('WebVTT Subtitles (also for EXMARaLDA)', '*.vtt') ] @@ -2029,7 +2029,7 @@ def button_transcript_file_event(self): if ft[1] == f'*.{config["last_filetype"]}': filetypes.insert(0, filetypes.pop(i)) break - + if len(self.audio_files_list) > 1: # multiple audio files, select an output directory tk.messagebox.showinfo(title='noScribe', message=t('output_dir_selection')) @@ -2040,26 +2040,26 @@ def button_transcript_file_event(self): return else: # single audio file, select an output file name - fn = tk.filedialog.asksaveasfilename(initialdir=_initialdir, initialfile=_initialfile, - filetypes=filetypes, + fn = tk.filedialog.asksaveasfilename(initialdir=_initialdir, initialfile=_initialfile, + filetypes=filetypes, defaultextension=config['last_filetype']) if fn: file_ext = os.path.splitext(fn)[1][1:].lower() if not file_ext in ['html', 'txt', 'vtt']: tk.messagebox.showerror(title='noScribe', message=t('err_unsupported_output_format', file_type=file_ext)) - return + return self.transcript_files_list = [fn] self.button_transcript_file_name.configure(text=os.path.basename(fn)) config['last_filetype'] = file_ext else: return - + self.logn() log_msg = t('log_transcript_filename') for fn in self.transcript_files_list: log_msg += f'\n{fn}' self.logn(log_msg) - + def set_progress(self, step, value, speaker_detection='none'): """ Update state of the progress bar """ if getattr(self, '_headless', False): @@ -2085,7 +2085,7 @@ def set_progress(self, step, value, speaker_detection='none'): # stop updating progress bars if the change is less than 1% (0.01) return self.current_progress = progr - + # Update log_progress_bar if self.current_progress > 0: self.log_progress_bar.set(self.current_progress) @@ -2097,14 +2097,14 @@ def set_progress(self, step, value, speaker_detection='none'): if self.log_progress_bar.winfo_ismapped(): self.log_progress_bar.pack_forget() self.log_stop_btn.configure(state=ctk.DISABLED) - + # Update progress of currently running job in queue table if progr >= 0: running_jobs = self.queue.get_running_jobs() if running_jobs: current_job = running_jobs[0] # Get the first running job current_job.progress = progr - + # Update the progress bar background for this job job_key = id(current_job) if hasattr(self, 'queue_row_widgets') and job_key in self.queue_row_widgets: @@ -2113,26 +2113,26 @@ def set_progress(self, step, value, speaker_detection='none'): row['frame'].set_progress(progr) def collect_transcription_options(self) -> TranscriptionQueue: - """Collect all transcription options from UI and config and creates a + """Collect all transcription options from UI and config and creates a TranscriptionQueue for each audio file""" # Validate required inputs if len(self.audio_files_list) == 0: raise ValueError(t('err_no_audio_file')) - + if len(self.transcript_files_list) == 0: raise ValueError(t('err_no_transcript_file')) - + # Parse time range from UI start_time = None val = self.entry_start.get() if val != '': start_time = utils.str_to_ms(val) - + stop_time = None val = self.entry_stop.get() if val != '': stop_time = utils.str_to_ms(val) - + # Get whisper model path sel_whisper_model = self.option_menu_whisper_model.get() if sel_whisper_model not in self.whisper_models: @@ -2140,7 +2140,7 @@ def collect_transcription_options(self) -> TranscriptionQueue: queue = TranscriptionQueue() if len(self.audio_files_list) != len(self.transcript_files_list): self.create_default_transcript_names() - + for i in range(len(self.audio_files_list)): job = create_transcription_job( audio_file=self.audio_files_list[i], @@ -2160,9 +2160,9 @@ def collect_transcription_options(self) -> TranscriptionQueue: if job.file_ext == 'vtt' and (job.pause > 0 or job.overlapping or job.timestamps): self.logn() self.logn(t('err_vtt_invalid_options'), 'error') - + queue.add_job(job) - + return queue def transcription_worker(self, start_job_index=None): @@ -2181,7 +2181,7 @@ def transcription_worker(self, start_job_index=None): if pending > 0: self.logn(t('queue_start_jobs', total=pending)) else: - self.logn(t('queue_none_waiting')) + self.logn(t('queue_none_waiting')) # Process each job in the queue while self.queue.has_pending_jobs(): # If global cancel was requested (via Stop button), cancel all waiting jobs @@ -2190,7 +2190,7 @@ def transcription_worker(self, start_job_index=None): job.set_canceled(t('err_user_cancelation')) self.update_queue_table() break - + # Get next job job = None if start_job_index and start_job_index < len(self.queue.jobs): @@ -2201,19 +2201,19 @@ def transcription_worker(self, start_job_index=None): job = self.queue.get_next_waiting_job() if not job: break - + # Process the job try: self.logn() self.logn(t('start_job', audio_file=os.path.basename(job.audio_file)), 'highlight') - + # Process single job self._process_single_job(job) - + queue_jobs_processed += 1 job.set_finished() self.update_queue_table() - + except Exception as e: # Distinguish cancellation from real errors error_msg = job.error_message or str(e) @@ -2231,7 +2231,7 @@ def transcription_worker(self, start_job_index=None): if self._cancel_job_only: self.cancel = False self._cancel_job_only = False - + # Log final summary final_summary = self.queue.get_queue_summary() self.logn() @@ -2240,13 +2240,13 @@ def transcription_worker(self, start_job_index=None): self.logn(t('completed', finished=final_summary['finished'])) self.logn(t('failed', errors=final_summary['errors'])) self.logn(t('canceled_summary', canceled=final_summary['canceled'])) - + # Log total processing time total_time = datetime.datetime.now() - queue_start_time total_seconds = "{:02d}".format(int(total_time.total_seconds() % 60)) total_time_str = f'{int(total_time.total_seconds() // 60)}:{total_seconds}' self.logn(t('processing_time', total_time_str=total_time_str)) - + # open editor if only a single file was processed if not getattr(self, '_headless', False) \ and queue_jobs_processed == 1 \ @@ -2256,14 +2256,14 @@ def transcription_worker(self, start_job_index=None): and get_config('auto_edit_transcript', 'True') == 'True': self.launch_editor(job.transcript_file) elif queue_jobs_processed > 1 and not getattr(self, '_headless', False): - # if more than one job has been processed, switch to queue tab for an overview + # if more than one job has been processed, switch to queue tab for an overview self.tabview.set(self.tabview._name_list[1]) - + except Exception as e: self.logn(f"Queue processing error: {str(e)}", 'error') traceback_str = traceback.format_exc() self.logn(f"Queue error details: {traceback_str}", where='file') - + finally: # Hide progress self.set_progress(0, 0) @@ -2277,7 +2277,7 @@ def _process_single_job(self, job: TranscriptionJob): proc_start_time = datetime.datetime.now() job.set_running() self.update_queue_table() - + tmpdir = TemporaryDirectory('noScribe') tmp_audio_file = os.path.join(tmpdir.name, 'tmp_audio.wav') orig_transcript_file = job.transcript_file @@ -2385,7 +2385,7 @@ def _process_single_job(self, job: TranscriptionJob): def overlap_len(ss_start, ss_end, ts_start, ts_end): # ss...: speaker segment start and end in milliseconds (from pyannote) # ts...: transcript segment start and end (from whisper.cpp) - # returns overlap percentage, i.e., "0.8" = 80% of the transcript segment overlaps with the speaker segment from pyannote + # returns overlap percentage, i.e., "0.8" = 80% of the transcript segment overlaps with the speaker segment from pyannote if ts_end < ss_start: # no overlap, ts is before ss return None @@ -2404,8 +2404,8 @@ def overlap_len(ss_start, ss_end, ts_start, ts_end): return ol_len / ts_len def find_speaker(diarization, transcript_start, transcript_end) -> str: - # Looks for the shortest segment in diarization that has at least 80% overlap - # with transcript_start - trancript_end. + # Looks for the shortest segment in diarization that has at least 80% overlap + # with transcript_start - trancript_end. # Returns the speaker name if found. # If only an overlap < 80% is found, this speaker name ist returned. # If no overlap is found, an empty string is returned. @@ -2429,11 +2429,11 @@ def find_speaker(diarization, transcript_start, transcript_end) -> str: overlap_found = t segment_len = current_segment_len spkr = current_segment_spkr - elif t > overlap_found: # no segment with good overlap yet, take this if the overlap is better then previously found + elif t > overlap_found: # no segment with good overlap yet, take this if the overlap is better then previously found overlap_found = t segment_len = current_segment_len spkr = current_segment_spkr - + if job.overlapping and is_overlapping: return f"//{spkr}" else: @@ -2496,7 +2496,7 @@ def find_speaker(diarization, transcript_start, transcript_end) -> str: # prepare transcript html d = AdvancedHTMLParser.AdvancedHTMLParser() - d.parseStr(default_html) + d.parseStr(default_html) # add audio file path: tag = d.createElement("meta") @@ -2517,7 +2517,7 @@ def find_speaker(diarization, transcript_start, transcript_end) -> str: main_body.addClass('WordSection1') d.body.appendChild(main_body) - # header + # header p = d.createElement('p') p.setStyle('font-weight', '600') p.appendText(Path(job.audio_file).stem) # use the name of the audio file (without extension) as the title @@ -2633,7 +2633,7 @@ def adjust_for_pause(segment): segment.end = original_end return segment - + # Run Faster-Whisper in a spawned subprocess and stream segments last_segment_end = 0 last_timestamp_ms = 0 @@ -2701,8 +2701,8 @@ def __init__(self, d): prev_speaker = speaker speaker = new_speaker seg_text = f' {speaker}:{seg_text}' - seg_html = html.escape(seg_text, quote=False) - elif (speaker[:2] == '//') and (new_speaker == prev_speaker): # was overlapping speech and we are returning to the previous speaker + seg_html = html.escape(seg_text, quote=False) + elif (speaker[:2] == '//') and (new_speaker == prev_speaker): # was overlapping speech and we are returning to the previous speaker speaker = new_speaker seg_text = f'//{seg_text}' seg_html = html.escape(seg_text, quote=False) @@ -2732,7 +2732,7 @@ def __init__(self, d): else: seg_html = html.escape(seg_text, quote=False).lstrip() seg_text = f'{speaker}:{seg_text}' - + else: # same speaker if job.timestamps: if (start - last_timestamp_ms) > job.timestamp_interval: @@ -2760,7 +2760,7 @@ def __init__(self, d): p.appendChild(a) self.log(seg_text) - + first_segment = False # auto save periodically @@ -2776,13 +2776,13 @@ def __init__(self, d): self.set_progress(3, progr, job.speaker_detection) except Exception: pass - + try: info = self._run_whisper_subprocess_stream(tmp_audio_file, job, on_segment) transcription_success = True # if self.cancel: - # raise Exception(t('err_user_cancelation')) - + # raise Exception(t('err_user_cancelation')) + job.has_partial_transcript = False # transcript is finished self.logn() self.logn() @@ -2818,8 +2818,8 @@ def __init__(self, d): # log duration of the whole process proc_time = datetime.datetime.now() - proc_start_time proc_seconds = "{:02d}".format(int(proc_time.total_seconds() % 60)) - proc_time_str = f'{int(proc_time.total_seconds() // 60)}:{proc_seconds}' - self.logn(t('trancription_time', duration=proc_time_str)) + proc_time_str = f'{int(proc_time.total_seconds() // 60)}:{proc_seconds}' + self.logn(t('trancription_time', duration=proc_time_str)) finally: self.log_file.close() self.log_file = None @@ -2827,13 +2827,13 @@ def __init__(self, d): finally: # hide progress self.set_progress(0, 0) - + def create_job(self, enqueue=False): try: show_queue_tab = enqueue # Collect transcription options from UI new_queue = self.collect_transcription_options() - + # Confirm override if output file conflicts with jobs in queue for job in new_queue.jobs: if self.queue.has_output_conflict(job.transcript_file): @@ -2844,7 +2844,7 @@ def create_job(self, enqueue=False): # Add the jobs to the queue for job in new_queue.jobs: - self.queue.add_job(job) + self.queue.add_job(job) if not enqueue and not self.queue.is_running(): # Start transcription worker with the queue wkr = Thread(target=self.transcription_worker, kwargs={"start_job_index": len(self.queue.jobs) - 1}, daemon=True) self._worker_threads.append(wkr) @@ -2854,14 +2854,14 @@ def create_job(self, enqueue=False): show_queue_tab = True self.logn() self.logn(t('queue_added_job', audio_file=os.path.basename(job.audio_file)), 'highlight') - + self.update_queue_table() if show_queue_tab: try: self.tabview.set(self.tabview._name_list[1]) # Switch to queue tab for visual feedback except Exception: pass - + except (ValueError, FileNotFoundError) as e: # Handle validation errors from collect_transcription_options self.logn(str(e), 'error') @@ -3122,7 +3122,7 @@ def _run_diarize_subprocess(self, tmp_audio_file: str, job): self._mp_queue = None return diarization or [] - + def on_closing(self): # (see: https://stackoverflow.com/questions/111155/how-do-i-handle-the-window-close-event-in-tkinter) global force_pyannote_cpu @@ -3286,7 +3286,7 @@ def run_cli_mode(args): try: # Create a headless app instance (no GUI initialization) app = HeadlessApp() - + # Validate and set the whisper model if args.model: if args.model not in app.whisper_models: @@ -3302,18 +3302,18 @@ def run_cli_mode(args): else: print("Error: No Whisper models found.") return 1 - + # Create job from CLI arguments job = create_job_from_cli_args(args) - + # Set the whisper model path job.whisper_model = app.whisper_models[args.model] - + # Validate files if not os.path.exists(job.audio_file): print(f"Error: Audio file '{job.audio_file}' not found.") return 1 - + # Check output directory exists output_dir = os.path.dirname(os.path.abspath(job.transcript_file)) if not os.path.exists(output_dir): @@ -3322,20 +3322,20 @@ def run_cli_mode(args): except Exception as e: print(f"Error: Cannot create output directory '{output_dir}': {e}") return 1 - + # Add the job to the queue app.queue.add_job(job) - + print(f"Starting transcription of '{job.audio_file}'...") print(f"Output will be saved to '{job.transcript_file}'") print(f"Language: {job.language_name}") print(f"Model: {args.model}") print(f"Speaker detection: {job.speaker_detection}") print() - + # Start transcription worker with the queue app.transcription_worker() - + # Check results final_summary = app.queue.get_queue_summary() if final_summary['finished'] > 0: @@ -3351,7 +3351,7 @@ def run_cli_mode(args): if failed_jobs: print(f"Error: {failed_jobs[0].error_message}") return 1 - + except Exception as e: print(f"Error: {str(e)}") return 1 @@ -3366,11 +3366,11 @@ def show_available_models(): # Create headless app instance to get models app = HeadlessApp() models = app.whisper_models.keys() - + print("Available Whisper models:") for model in models: print(f" - {model}") - + if not models: print(" No models found. Please check your installation.") except Exception as e: diff --git a/noScribe/pyannote_mp_worker.py b/noScribe/pyannote_mp_worker.py index 7432c1f2..7fe8883b 100644 --- a/noScribe/pyannote_mp_worker.py +++ b/noScribe/pyannote_mp_worker.py @@ -23,7 +23,7 @@ def pyannote_proc_entrypoint(args: dict, q): try: import torch if platform.system() == "Darwin" and platform.machine() == "x86_64": - torch.set_num_threads(1) + torch.set_num_threads(1) from pyannote.audio import Pipeline def plog(level, msg): @@ -59,7 +59,7 @@ def __call__(self, step_name, step_artifact, file=None, total=None, completed=No raise FileNotFoundError(audio_file) plog("debug", "Subprocess (diarize) started. Initializing PyAnnote pipeline...") - + # determine xpu device = args.get("device", "") if device != 'cpu': @@ -75,7 +75,7 @@ def __call__(self, step_name, step_artifact, file=None, total=None, completed=No with impres.as_file(impres.files("pyannote")) as mypath: pipeline = Pipeline.from_pretrained(mypath) - waveform, sample_rate = torchaudio.load(audio_file) + waveform, sample_rate = torchaudio.load(audio_file) pipeline.to(torch.device(device)) seg_list = [] diff --git a/noScribe/tkHyperlinkManager.py b/noScribe/tkHyperlinkManager.py index c4230cfa..61c23b1a 100644 --- a/noScribe/tkHyperlinkManager.py +++ b/noScribe/tkHyperlinkManager.py @@ -1,6 +1,6 @@ from tkinter import Text, CURRENT # Source: https://web.archive.org/web/20200806215420/http://effbot.org/zone/tkinter-text-hyperlink.htm -# and https://stackoverflow.com/questions/49353034/python-tkinter-have-displayed-text-as-hyperlink +# and https://stackoverflow.com/questions/49353034/python-tkinter-have-displayed-text-as-hyperlink class HyperlinkManager: def __init__(self, text): diff --git a/noScribe/utils.py b/noScribe/utils.py index 6bf62a86..5d7fa0ce 100644 --- a/noScribe/utils.py +++ b/noScribe/utils.py @@ -369,4 +369,4 @@ def get_segments(self): ret += _vtt_escape(item["text"]) ret += "\n\n" - return ret \ No newline at end of file + return ret diff --git a/noScribe/whisper_mp_worker.py b/noScribe/whisper_mp_worker.py index 56195013..5b51e0a8 100644 --- a/noScribe/whisper_mp_worker.py +++ b/noScribe/whisper_mp_worker.py @@ -53,7 +53,7 @@ def plog(level, msg): # Using `t` once here to load the localization files into memory. # As there is no `print`, nothing happens really. t("app_header") - + # determine device device = args.get("device", "") if device != 'cpu': @@ -66,7 +66,7 @@ def plog(level, msg): device = 'cpu' else: raise Exception('Platform not supported yet.') - + # Build model in child using provided options model = WhisperModel( str(args["whisper_model"].path), @@ -102,12 +102,12 @@ def log_cb(level, msg): language_code = args.get("language_code") multilingual = False whisper_lang = None - + if not model.model.is_multilingual and language_code != 'en': language_name = 'English' language_code = 'en' log_cb("info", t('language_en_only')) - + if language_name == "Multilingual": multilingual = True whisper_lang = None @@ -149,9 +149,9 @@ def log_cb(level, msg): vad_filter=args.get("vad_filter", True), vad_parameters=vad_parameters, ) - + log_cb('info', t('start_transcription') + '\n') - + # Stream segments to parent as they arrive for s in segments: try: diff --git a/noScribeEdit/readme.txt b/noScribeEdit/readme.txt index 6e28405d..26925b3e 100644 --- a/noScribeEdit/readme.txt +++ b/noScribeEdit/readme.txt @@ -1 +1 @@ -This folder must contain an compiled version of the noScribe Editor: https://github.com/kaixxx/noScribeEditor \ No newline at end of file +This folder must contain an compiled version of the noScribe Editor: https://github.com/kaixxx/noScribeEditor diff --git a/prompts/prompt.yml b/prompts/prompt.yml index 62ddfb86..e896a0aa 100644 --- a/prompts/prompt.yml +++ b/prompts/prompt.yml @@ -58,4 +58,4 @@ tr: "Şey, bilirsin, bu, aslında, zor." uk: "Ем, знаєш, це, як би, не просто." ur: "ہمم، میں سوچ رہا ہوں کہ یہ ایسا، آسان نہیں ہے۔" vi: "Ờ, biết không, nó không dễ đâu." -cy: "Um, wyddost ti, mae o'n, fel, anodd." \ No newline at end of file +cy: "Um, wyddost ti, mae o'n, fel, anodd." diff --git a/pyannote/README.md b/pyannote/README.md index 8356d663..5096d456 100644 --- a/pyannote/README.md +++ b/pyannote/README.md @@ -18,7 +18,7 @@ extra_gated_fields: Company/university: text Use case: type: select - options: + options: - label: Meeting note taker (automated meeting transcription, action item extraction, and speaker identification in recordings) value: meeting - label: Conversation AI (chatbots, voice assistants, multi-turn dialogue systems with speaker awareness) @@ -62,24 +62,24 @@ The [main improvements brought by `Community-1`](https://www.pyannote.ai/blog/co # download the pipeline from Huggingface from pyannote.audio import Pipeline pipeline = Pipeline.from_pretrained( - "pyannote/speaker-diarization-community-1", + "pyannote/speaker-diarization-community-1", token="{huggingface-token}") # run the pipeline locally on your computer output = pipeline("audio.wav") -# print the predicted speaker diarization +# print the predicted speaker diarization for turn, speaker in output.speaker_diarization: print(f"{speaker} speaks between t={turn.start:.3f}s and t={turn.end:.3f}s") ``` ## Benchmark -Out of the box, `Community-1` is much better than `speaker-diarization-3.1`. +Out of the box, `Community-1` is much better than `speaker-diarization-3.1`. We report [diarization error rates](http://pyannote.github.io/pyannote-metrics/reference.html#diarization) (in %) on large collection of academic benchmarks (fully automatic processing, no forgiveness collar, nor skipping overlapping speech). -| Benchmark (last updated in 2025-09) | `legacy` (3.1)| `community-1` | `precision-2` | +| Benchmark (last updated in 2025-09) | `legacy` (3.1)| `community-1` | `precision-2` | | --------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------ | -------------------------------------------------| ------------------------------------------------ | | [AISHELL-4](https://arxiv.org/abs/2104.03603) | 12.2 | 11.7 | 11.4 | | [AliMeeting](https://www.openslr.org/119/) (channel 1) | 24.5 | 20.3 | 15.2 | diff --git a/pyannote/config.yaml b/pyannote/config.yaml index 4022db43..ecd8dff2 100644 --- a/pyannote/config.yaml +++ b/pyannote/config.yaml @@ -1,4 +1,4 @@ -dependencies: +dependencies: pyannote.audio: 4.0.0 pipeline: @@ -11,7 +11,7 @@ pipeline: embedding_batch_size: 32 embedding_exclude_overlap: true plda: $model/plda - + params: clustering: threshold: 0.6 diff --git a/pyannote/plda/README.md b/pyannote/plda/README.md index 5c01ad2e..f876bdc0 100644 --- a/pyannote/plda/README.md +++ b/pyannote/plda/README.md @@ -1,3 +1,3 @@ PLDA model trained by [BUT Speech@FIT](https://speech.fit.vut.cz/) group. -Thanks to [Jiangyu Han](https://github.com/jyhan03) and [Petr Pálka](https://github.com/Selesnyan) for the integration of VBx in pyannote.audio. \ No newline at end of file +Thanks to [Jiangyu Han](https://github.com/jyhan03) and [Petr Pálka](https://github.com/Selesnyan) for the integration of VBx in pyannote.audio. diff --git a/pyinstaller/noScribe_win.spec b/pyinstaller/noScribe_win.spec index d15f006a..3b69f992 100644 --- a/pyinstaller/noScribe_win.spec +++ b/pyinstaller/noScribe_win.spec @@ -12,20 +12,20 @@ project_root = os.path.abspath(os.path.join(SPECPATH, '..')) # noScribe: -noScribe_datas = [] +noScribe_datas = [] noScribe_binaries = [] noScribe_hiddenimports = [] noScribe_datas += [ -('../models/precise/', './models/precise/'), -('../models/fast/', './models/fast/'), -('../noScribeEdit/', './noScribeEdit/'), -('../trans/', './trans/'), -('../img/graphic_sw.png', 'img/'), -('../LICENSE.txt', '.'), -('../img/noScribeLogo.ico', 'img/'), +('../models/precise/', './models/precise/'), +('../models/fast/', './models/fast/'), +('../noScribeEdit/', './noScribeEdit/'), +('../trans/', './trans/'), +('../img/graphic_sw.png', 'img/'), +('../LICENSE.txt', '.'), +('../img/noScribeLogo.ico', 'img/'), ('../prompts/prompt.yml', 'prompts/'), -('../prompts/prompt_nd.yml', 'prompts/'), +('../prompts/prompt_nd.yml', 'prompts/'), ('../README.md', '.')] noScribe_datas += collect_data_files('customtkinter') noScribe_datas += copy_metadata('AdvancedHTMLParser') diff --git a/pyinstaller/nsis_template.txt b/pyinstaller/nsis_template.txt index 1124cb6d..1f9cdb9a 100644 --- a/pyinstaller/nsis_template.txt +++ b/pyinstaller/nsis_template.txt @@ -1,6 +1,6 @@ ############################################################################################ # Template NSIS Installation Script for noScribe -# for NullSoft Scriptable Installation System +# for NullSoft Scriptable Installation System ############################################################################################ !define APP_NAME "noScribe" @@ -87,7 +87,7 @@ Section -MainProgram ; Read uninstall string from registry ReadRegStr $0 ${REG_ROOT} "${UNINSTALL_PATH}" "UninstallString" -StrCmp $0 "" uninstallUser +StrCmp $0 "" uninstallUser ; Check if the installation is silent ${If} ${Silent} @@ -113,7 +113,7 @@ uninstallUser: ReadRegStr $0 "HKCU" "${UNINSTALL_PATH}" "UninstallString" -StrCmp $0 "" continueInstallation +StrCmp $0 "" continueInstallation ; Check if the installation is silent ${If} ${Silent} @@ -155,7 +155,7 @@ WriteUninstaller "$INSTDIR\uninstall.exe" CreateDirectory "$SMPROGRAMS\$SM_Folder" CreateShortCut "$SMPROGRAMS\$SM_Folder\${APP_NAME}.lnk" "$INSTDIR\${MAIN_APP_EXE}" CreateShortCut "$SMPROGRAMS\$SM_Folder\Uninstall ${APP_NAME}.lnk" "$INSTDIR\uninstall.exe" -CreateShortCut "$SMPROGRAMS\$SM_Folder\noScribe Editor.lnk" "$INSTDIR\_internal\noScribeEdit\noScribeEdit.exe" +CreateShortCut "$SMPROGRAMS\$SM_Folder\noScribe Editor.lnk" "$INSTDIR\_internal\noScribeEdit\noScribeEdit.exe" !ifdef WEB_SITE WriteIniStr "$INSTDIR\${APP_NAME} website.url" "InternetShortcut" "URL" "${WEB_SITE}" @@ -195,7 +195,7 @@ ${INSTALL_TYPE} ############################ FILE/DIR LIST UNINSTALL ###################################### #*uninstall_entries*# - + Delete "$INSTDIR\uninstall.exe" !ifdef WEB_SITE Delete "$INSTDIR\${APP_NAME} website.url" diff --git a/pyinstaller/win_build.py b/pyinstaller/win_build.py index 7483d29c..4e6c66a8 100644 --- a/pyinstaller/win_build.py +++ b/pyinstaller/win_build.py @@ -27,7 +27,7 @@ def get_pyinstaller_out_path(cuda=False): return os.path.join(script_dir, 'dist', 'noScribe_noncuda') def run_pyinstaller(cuda=False): - global final_report + global final_report print('##############################################################') print('PyInstaller cuda' if cuda else 'PyInstaller non cuda') @@ -36,11 +36,11 @@ def run_pyinstaller(cuda=False): pyinstaller_cmd = f'conda activate {conda_env_cuda} &&' else: pyinstaller_cmd = f'conda activate {conda_env_noncuda} &&' - + pyinstaller_cmd += f'python "{pyinstaller_path}" --noconfirm "{os.path.join(script_dir, 'noScribe_win.spec')}" --distpath {pyinstaller_out_path}' if clean_build: pyinstaller_cmd += ' --clean' - + print(pyinstaller_cmd) proc = Popen(pyinstaller_cmd, shell=True, cwd=script_dir) proc.communicate() @@ -64,13 +64,13 @@ def format_version(version_string): segments = version_string.split('.') # Calculate how many additional "0" segments need to be appended missing_segments = target_length - len(segments) - + if missing_segments > 0: # Append "0" for each missing segment segments.extend(['0'] * missing_segments) return '.'.join(segments) - + global final_report pyinstaller_out_path = get_pyinstaller_out_path(cuda) installer_name = 'noScribe_setup_' + noScribe_version.replace('.', '_') @@ -78,10 +78,10 @@ def format_version(version_string): installer_name += '_cuda' installer_name += '.exe' installer_name = os.path.join(script_dir, 'win_installer', installer_name) - + print('##############################################################') print('NISIS cuda' if cuda else 'NSIS non cuda') - + # prepare template with open(os.path.join(script_dir, 'nsis_template.txt'), 'r', encoding="utf-8") as nsis_templ_file: nsis_templ = nsis_templ_file.read() @@ -90,7 +90,7 @@ def format_version(version_string): # Recursively generate NSIS commands for installation and uninstallation # of directories and files from the specified directory. - + base_directory = os.path.join(pyinstaller_out_path, 'noScribe') install_entries = '' # "Section \"Install\"\n" @@ -110,7 +110,7 @@ def format_version(version_string): if relative_path: directories_created.append(relative_path.replace(os.sep, "\\")) - + for filename in files: # Generate File command for each file filepath = os.path.join(root, filename).replace(os.sep, "\\") @@ -136,7 +136,7 @@ def format_version(version_string): nsis_cmd = '"' + nsis_path + '" /V4 "' + os.path.join(script_dir, 'nsis_tmp.nsi') + '"' proc = Popen(nsis_cmd, shell=True, cwd=os.path.join(script_dir, 'win_installer')) - proc.communicate() + proc.communicate() if proc.returncode != 0: final_report += 'NSIS commpiler failed.\n' final_report += 'Cmd: ' + nsis_cmd @@ -157,4 +157,4 @@ def format_version(version_string): if run_nsis_cuda: run_nsis(cuda=True) -print(final_report) \ No newline at end of file +print(final_report) diff --git a/tests/audio/test_convert.py b/tests/audio/test_convert.py index 434d7066..b366efef 100644 --- a/tests/audio/test_convert.py +++ b/tests/audio/test_convert.py @@ -114,4 +114,4 @@ def test_to_wav_start_stop_args(tmp_path): assert stream.format.container_name == "s16le" assert stream.channels == 1 # File is 1min long. - assert stream.duration * stream.time_base == pytest.approx(1 * 60, rel=1e-2) \ No newline at end of file + assert stream.duration * stream.time_base == pytest.approx(1 * 60, rel=1e-2) diff --git a/trans/noScribe.de.yml b/trans/noScribe.de.yml index 11a7b8b2..2a31f2c3 100644 --- a/trans/noScribe.de.yml +++ b/trans/noScribe.de.yml @@ -2,16 +2,16 @@ de: app_header: KI-basierte Audio-Transkription welcome_message: Hallo, noScribe ist bereit! welcome_credits: > - + Version %{v}, Kai Dröge %{y} - + portiert nach macOS von Philipp Schneider - + powered by Whisper (OpenAI), faster-whisper (Guillaume Klein) & pyannote (Hervé Bredin) welcome_instructions: > Bitte wähle links eine Audiodatei und den Dateinamen, unter dem das fertige Transkript gespeichert werden - soll. Hab Geduld, die Transkription braucht üblicherweise 2 bis 5 Mal so lang wie die Audiodatei ist. + soll. Hab Geduld, die Transkription braucht üblicherweise 2 bis 5 Mal so lang wie die Audiodatei ist. Bevor man die gesamte Datei transkribiert, sollte man die Einstellungen an einem kurzen Ausschnitt testen. (Start = 00.00.00 and Stop = 00.03.00 bspw. transkribiert nur die ersten drei Minuten.) @@ -28,13 +28,13 @@ de: label_audio_file: 'Audiodatei:' label_audio_file_name: multiple_audio_files: '(mehrere Dateien ausgewählt)' - + label_transcript_file: 'Transkript speichern unter:' - label_transcript_file_name: + label_transcript_file_name: output_dir_selection: 'Mehrere Audiodateien: Bitte einen Ordner wählen, in dem die Transkripte gespeichert werden. Die Dateinamen werden automatisch erzeugt.' label_start: 'Start (hh:mm:ss): ' - label_stop: 'Stop (hh:mm:ss): ' + label_stop: 'Stop (hh:mm:ss): ' label_language: 'Sprache:' label_speaker: 'Sprecher:in erkennen:' @@ -57,7 +57,7 @@ de: audio_conversion: 'Audiokonvertierung' speaker_identification: 'Sprecher:innenidentifikation' transcription: 'Transkription' - canceling: 'Abbruch läuft' + canceling: 'Abbruch läuft' canceled: 'Abgebrochen' finished: 'Abgeschlossen' error: 'Fehler' @@ -113,7 +113,7 @@ de: err_no_transcript_file: 'Fehler: Bitte einen Dateinamen für das fertige Transkript wählen.' err_ffmpeg: 'ffmpeg hat einen Fehler gemeldet.' err_converting_audio: Fehler im 1. Schritt - Audiokonvertierung. - err_identifying_speakers: Fehler im 2. Schritt - Erkennung der Sprecher:innen. + err_identifying_speakers: Fehler im 2. Schritt - Erkennung der Sprecher:innen. err_loading_prompt: 'Warnung: Whisper-Prompt konnte nicht geladen werden.' transcription_canceled: 'Transkription wirklich abbrechen?' queue_cancel_all_confirm: 'Dadurch werden alle unerledigten Aufträge in der Warteschlange abgebrochen. Möchtest du fortfahren?' @@ -148,7 +148,7 @@ de: doc_header_audio: 'Audiodatei: %{file}' pause_minutes: '(%{minutes} Minute[n] Pause)' pause_seconds: '(%{seconds} Sekunden Pause)' - + # Queue actions queue_remove_waiting: 'Diesen Auftrag aus der Warteschlange entfernen?' queue_remove_entry: 'Diesen Eintrag aus der Liste entfernen?' diff --git a/trans/noScribe.en.yml b/trans/noScribe.en.yml index 6d7cea6c..1e2f1114 100644 --- a/trans/noScribe.en.yml +++ b/trans/noScribe.en.yml @@ -2,18 +2,18 @@ en: app_header: AI-powered Audio Transcription welcome_message: Hi there, noScribe is ready! welcome_credits: > - + Version %{v}, Kai Dröge %{y} - + ported to macOS by Philipp Schneider - + powered by Whisper (OpenAI), faster-whisper (Guillaume Klein) & pyannote (Hervé Bredin) welcome_instructions: > Use the buttons on the left to select an audio file and a filename for your finished transcript. - Please be patient. The transcription may take 2 to 5 times as long as your audio file. Test your settings - with a shorter section before transcribing the full audio. (Start = 00.00.00 and Stop = 00.03.00 will - transcribe the first 3 minutes.) + Please be patient. The transcription may take 2 to 5 times as long as your audio file. Test your settings + with a shorter section before transcribing the full audio. (Start = 00.00.00 and Stop = 00.03.00 will + transcribe the first 3 minutes.) Enjoy! @@ -29,13 +29,13 @@ en: label_audio_file: 'Audio file:' label_audio_file_name: - output_dir_selection: 'Multiple audio files: Select a directory where the transcripts should be saved. The names will be generated automatically.' + label_transcript_file_name: