diff --git a/lib/ruby_git.rb b/lib/ruby_git.rb index 3d80976..9c6e0e6 100644 --- a/lib/ruby_git.rb +++ b/lib/ruby_git.rb @@ -1,7 +1,6 @@ # frozen_string_literal: true require_relative 'ruby_git/command_line' -require_relative 'ruby_git/encoding_normalizer' require_relative 'ruby_git/errors' require_relative 'ruby_git/option_validators' require_relative 'ruby_git/repository' diff --git a/lib/ruby_git/command_line/encoding_normalizer.rb b/lib/ruby_git/command_line/encoding_normalizer.rb new file mode 100644 index 0000000..d2a7f12 --- /dev/null +++ b/lib/ruby_git/command_line/encoding_normalizer.rb @@ -0,0 +1,51 @@ +# frozen_string_literal: true + +require 'rchardet' + +module RubyGit + module CommandLine + # Utility to normalize string encoding + # @api public + module EncodingNormalizer + # Detects the character encoding used to create a string or binary data + # + # Detects the encoding of a string or return binary if it cannot be detected + # + # @example + # EncodingNormalizer.detect_encoding("Hello, world!") #=> "ascii" + # EncodingNormalizer.detect_encoding("\xCB\xEF\xF1\xE5\xEC") #=> "ISO-8859-7" + # EncodingNormalizer.detect_encoding("\xC0\xCC\xB0\xCD\xC0\xBA") #=> "EUC-KR" + # + # @param str [String] the string to detect the encoding of + # @return [String] the detected encoding + # + def self.detect_encoding(str) + CharDet.detect(str)&.dig('encoding') || Encoding::BINARY.name + end + + # Normalizes the encoding to normalize_to + # + # @example + # EncodingNormalizer.normalize("Hello, world!") #=> "Hello, world!" + # EncodingNormalizer.normalize("\xCB\xEF\xF1\xE5\xEC") #=> "Λορεμ" + # EncodingNormalizer.normalize("\xC0\xCC\xB0\xCD\xC0\xBA") #=> "이것은" + # + # @param str [String] the string to normalize + # @param normalize_to [String] the name of the encoding to normalize to + # + # @return [String] the string with encoding converted to normalize_to + # + # @raise [Encoding::UndefinedConversionError] if the string cannot be converted to the default encoding + # + def self.normalize(str, normalize_to: Encoding::UTF_8.name) + encoding_options = { invalid: :replace, undef: :replace } + + detected_encoding = detect_encoding(str) + + return str if str.valid_encoding? && detected_encoding == normalize_to + + str.encode(normalize_to, detected_encoding, **encoding_options) + end + end + end +end diff --git a/lib/ruby_git/command_line/runner.rb b/lib/ruby_git/command_line/runner.rb index 03fa66d..ef426f5 100644 --- a/lib/ruby_git/command_line/runner.rb +++ b/lib/ruby_git/command_line/runner.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true +require_relative 'encoding_normalizer' require_relative 'result' + require 'ruby_git/errors' module RubyGit @@ -303,7 +305,7 @@ def process_output(output, result) output = if result.options.normalize_encoding - output.lines.map { |l| RubyGit::EncodingNormalizer.normalize(l) }.join + output.lines.map { |l| EncodingNormalizer.normalize(l) }.join else output.dup end diff --git a/lib/ruby_git/encoding_normalizer.rb b/lib/ruby_git/encoding_normalizer.rb deleted file mode 100644 index 77f9aa0..0000000 --- a/lib/ruby_git/encoding_normalizer.rb +++ /dev/null @@ -1,49 +0,0 @@ -# frozen_string_literal: true - -require 'rchardet' - -module RubyGit - # Utility to normalize string encoding - # @api public - module EncodingNormalizer - # Detects the character encoding used to create a string or binary data - # - # Detects the encoding of a string or return binary if it cannot be detected - # - # @example - # RubyGit::EncodingNormalizer.detect_encoding("Hello, world!") #=> "ascii" - # RubyGit::EncodingNormalizer.detect_encoding("\xCB\xEF\xF1\xE5\xEC") #=> "ISO-8859-7" - # RubyGit::EncodingNormalizer.detect_encoding("\xC0\xCC\xB0\xCD\xC0\xBA") #=> "EUC-KR" - # - # @param str [String] the string to detect the encoding of - # @return [String] the detected encoding - # - def self.detect_encoding(str) - CharDet.detect(str)&.dig('encoding') || Encoding::BINARY.name - end - - # Normalizes the encoding to normalize_to - # - # @example - # RubyGit::EncodingNormalizer.normalize("Hello, world!") #=> "Hello, world!" - # RubyGit::EncodingNormalizer.normalize("\xCB\xEF\xF1\xE5\xEC") #=> "Λορεμ" - # RubyGit::EncodingNormalizer.normalize("\xC0\xCC\xB0\xCD\xC0\xBA") #=> "이것은" - # - # @param str [String] the string to normalize - # @param normalize_to [String] the name of the encoding to normalize to - # - # @return [String] the string with encoding converted to normalize_to - # - # @raise [Encoding::UndefinedConversionError] if the string cannot be converted to the default encoding - # - def self.normalize(str, normalize_to: Encoding::UTF_8.name) - encoding_options = { invalid: :replace, undef: :replace } - - detected_encoding = detect_encoding(str) - - return str if str.valid_encoding? && detected_encoding == normalize_to - - str.encode(normalize_to, detected_encoding, **encoding_options) - end - end -end