nextcloud · ChristophWurst · Mar 25, 2026 · Mar 25, 2026 · Mar 25, 2026 · Mar 25, 2026
@@ -11,11 +11,40 @@
 
 use Horde_Mime_Part;
 use OCA\Mail\Exception\ServiceException;
+use ValueError;
 use function in_array;
 use function is_string;
 
 class Converter {
 
+	/**
+	 * Map of unsupported charset names to their mbstring equivalents.
+	 * Keys must be lowercase for case-insensitive lookup.
+	 *
+	 * @see http://lists.w3.org/Archives/Public/ietf-charsets/2001AprJun/0030.html
+	 */
+	private const CHARSET_MAP = [
+		'ks_c_5601-1987' => 'UHC',
+		'ks_c_5601-1989' => 'UHC',
+	];
+
+	/**
+	 * Normalize charset names for mbstring compatibility.
+	 *
+	 * Maps unsupported charset names to their mbstring equivalents.
+	 * Notably, handles Korean encodings used by Outlook:
+	 * - ks_c_5601-1987 and ks_c_5601-1989 are mapped to UHC (Windows-949/CP949)
+	 *
+	 * Charset tokens are case-insensitive in email headers (RFC 2045),
+	 * so we normalize to lowercase for lookup.
+	 */
+	private function normalizeCharset(string $charset): string {
+		$charset = trim($charset);
+		$lowerCharset = strtolower($charset);
+
+		return self::CHARSET_MAP[$lowerCharset] ?? $charset;
+	}
+
 	/**
 	 * @param Horde_Mime_Part $p
 	 * @return string
@@ -37,10 +66,17 @@
 
 		// The part specifies a charset
 		if ($charset !== null) {
-			if (in_array($charset, mb_list_encodings(), true)) {
-				$converted = mb_convert_encoding($data, 'UTF-8', $charset);
-			} else {
-				$converted = iconv($charset, 'UTF-8', $data);
+			$normalizedCharset = $this->normalizeCharset($charset);
+			try {
+				if (in_array($normalizedCharset, mb_list_encodings(), true)) {
+					$converted = mb_convert_encoding($data, 'UTF-8', $normalizedCharset);
+				} else {
+					$converted = @iconv($normalizedCharset, 'UTF-8', $data);
+				}
+			} catch (ValueError) {
+				// Invalid charset name, treat as null to use auto-detection below
+				$charset = null;
+				$converted = null;
 			}
 
 			if (is_string($converted)) {
@@ -51,22 +87,42 @@
 		// No charset specified, let's ask mb if this could be UTF-8
 		$detectedCharset = mb_detect_encoding($data, 'UTF-8', true);
 		if ($detectedCharset === false) {
-			// Fallback, non UTF-8
-			$detectedCharset = mb_detect_encoding($data, null, true);
+			// Fallback, try common charsets (the default mb_detect_encoding order may miss some)
+			$detectedCharset = mb_detect_encoding($data, 'ISO-8859-1,ISO-8859-2,UTF-8,ASCII', true);
 		}
 		// Still UTF8, no need to convert
 		if ($detectedCharset !== false && strtoupper($detectedCharset) === 'UTF-8') {
 			return $data;
 		}
 
-		$converted = @mb_convert_encoding($data, 'UTF-8', $charset);
+		// Use detected charset when available, otherwise use original/normalized charset
+		if ($detectedCharset !== false) {
+			$sourceCharset = $detectedCharset;
+		} elseif ($charset !== null) {
+			$sourceCharset = $this->normalizeCharset($charset);
+		} else {
+			$sourceCharset = null;
+		}
+
+		// Attempt conversion with the source charset
+		try {
+			$converted = @mb_convert_encoding($data, 'UTF-8', $sourceCharset);
+		} catch (ValueError) {
+			$converted = false;
+		}
+
 		if ($converted === false) {
 			// Might be a charset that PHP mb doesn't know how to handle, fall back to iconv
-			$converted = iconv($charset, 'UTF-8', $data);
+			try {
+				$converted = @iconv($sourceCharset, 'UTF-8', $data);
+			} catch (ValueError) {
+				// Invalid charset, conversion not possible
+				$converted = null;
+			}
 		}
 
 		if (!is_string($converted)) {
-			throw new ServiceException('Could not detect message charset');
+			throw new ServiceException('Could not convert message charset');
 		}
 		return $converted;
 	}

@@ -60,13 +60,28 @@ public function dataProviderMimeParts(): array {
 		$iso2022jpMimePart->setType('text/plain');
 		$iso2022jpMimePart->setCharset('ISO-2022-JP');
 		$iso2022jpMimePart->setContents(mb_convert_encoding('外せ園査リツハワ題', 'ISO-2022-JP', 'UTF-8'));
-		$iso2022jpMimePart_noCharset = new Horde_Mime_Part();
-		$iso2022jpMimePart_noCharset->setContents('外せ園査リツハワ題');
-		// Korean - not in mb nor iconv
-		// $iso106461MimePart = new Horde_Mime_Part();
-		// $iso106461MimePart->setType('text/plain');
-		// $iso106461MimePart->setCharset('ISO 10646-1');
-		//$iso106461MimePart->setContents(iconv('UTF-8', 'ISO 10646-1', '언론·출판은 타인의 명'));
+		// Korean (Outlook) - ks_c_5601-1987 is mapped to UHC (CP949)
+		// Use iconv for encoding to avoid dependency on mbstring's UHC support
+		$koreanKsc56011987MimePart = new Horde_Mime_Part();
+		$koreanKsc56011987MimePart->setType('text/plain');
+		$koreanKsc56011987MimePart->setCharset('ks_c_5601-1987');
+		$koreanText = '안녕하세요';
+		$koreanKsc56011987MimePart->setContents(iconv('UTF-8', 'CP949', $koreanText));
+		// Korean (Outlook) - ks_c_5601-1989 is also mapped to UHC (CP949)
+		$koreanKsc56011989MimePart = new Horde_Mime_Part();
+		$koreanKsc56011989MimePart->setType('text/plain');
+		$koreanKsc56011989MimePart->setCharset('ks_c_5601-1989');
+		$koreanKsc56011989MimePart->setContents(iconv('UTF-8', 'CP949', $koreanText));
+		// Korean (Outlook) - uppercase variant KS_C_5601-1987 (case-insensitive)
+		$koreanKsc56011987UpperMimePart = new Horde_Mime_Part();
+		$koreanKsc56011987UpperMimePart->setType('text/plain');
+		$koreanKsc56011987UpperMimePart->setCharset('KS_C_5601-1987');
+		$koreanKsc56011987UpperMimePart->setContents(iconv('UTF-8', 'CP949', $koreanText));
+		// Korean (Outlook) - mixed case variant Ks_C_5601-1987 (case-insensitive)
+		$koreanKsc56011987MixedMimePart = new Horde_Mime_Part();
+		$koreanKsc56011987MixedMimePart->setType('text/plain');
+		$koreanKsc56011987MixedMimePart->setCharset('Ks_C_5601-1987');
+		$koreanKsc56011987MixedMimePart->setContents(iconv('UTF-8', 'CP949', $koreanText));
 		// Arabic - not in mb
 		$windowsMimePart = new Horde_Mime_Part();
 		$windowsMimePart->setType('text/plain');
@@ -79,8 +94,63 @@ public function dataProviderMimeParts(): array {
 			[$iso88591MimePart, 'Ümlaut'],
 			[$iso2022jpMimePart, '外せ園査リツハワ題'],
 			[$iso88591MimePart_noCharset, 'בה בדף לחבר ממונרכיה, בקר בגרסה ואמנות דת'],
-			// [$iso106461MimePart, '언론·출판은 타인의 명'],
+			[$koreanKsc56011987MimePart, $koreanText],
+			[$koreanKsc56011989MimePart, $koreanText],
+			[$koreanKsc56011987UpperMimePart, $koreanText],
+			[$koreanKsc56011987MixedMimePart, $koreanText],
 			[$windowsMimePart, 'قام زهاء أوراقهم ما,']
 		];
 	}
+
+	/**
+	 * Test that conversion succeeds when no charset is specified in the MIME header.
+	 *
+	 * This tests the code path where $charset is null. The Converter should:
+	 * 1. Use mb_detect_encoding() to detect the source encoding
+	 * 2. Use the detected charset for conversion to UTF-8
+	 *
+	 * Without detection, conversion would fail or produce garbled output.
+	 */
+	public function testConvertWithNullCharsetFallback(): void {
+		// Create a mock that returns null for getCharset() to test the null charset path
+		$mimePart = $this->createMock(Horde_Mime_Part::class);
+		$mimePart->method('getContents')
+			->willReturn(mb_convert_encoding('Tëst', 'ISO-8859-1', 'UTF-8'));
+		$mimePart->method('getCharset')
+			->willReturn(null);
+
+		// Should complete without ValueError and return the correctly converted text
+		$result = $this->converter->convert($mimePart);
+
+		// Verify actual conversion correctness, not just UTF-8 validity
+		$this->assertEquals('Tëst', $result);
+		// Also verify it's valid UTF-8
+		$this->assertTrue(mb_check_encoding($result, 'UTF-8'));
+	}
+
+	/**
+	 * Test that an invalid/unknown charset name does not let ValueError bubble up.
+	 *
+	 * When an invalid charset is provided, Converter catches the ValueError
+	 * and falls back to mbstring auto-detection. The result depends on
+	 * mb_detect_order, but the important behavior is that no ValueError escapes.
+	 */
+	public function testConvertWithInvalidCharsetDoesNotThrowValueError(): void {
+		$mimePart = $this->createMock(Horde_Mime_Part::class);
+		$mimePart->method('getContents')
+			->willReturn(mb_convert_encoding('Tëst with spëcial chärs', 'ISO-8859-1', 'UTF-8'));
+		$mimePart->method('getCharset')
+			->willReturn('INVALID-CHARSET-NAME-12345');
+
+		$thrown = null;
+		try {
+			$this->converter->convert($mimePart);
+		} catch (\ValueError $e) {
+			$thrown = $e;
+		} catch (\OCA\Mail\Exception\ServiceException) {
+			// ServiceException is acceptable (auto-detection failed)
+		}
+
+		$this->assertNull($thrown, 'ValueError should not bubble up from convert()');
+	}
 }