Skip to content

Commit acf43c7

Browse files
author
Gal Ben David
committed
- fixed a bug with commits comprised of non-UTF8 text
1 parent a3f0625 commit acf43c7

File tree

8 files changed

+1455
-272
lines changed

8 files changed

+1455
-272
lines changed

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setuptools.setup(
77
name='PyRepScan',
8-
version='0.5.0',
8+
version='0.5.1',
99
author='Gal Ben David',
1010
author_email='[email protected]',
1111
url='https://github.com/intsights/PyRepScan',

src/git_repository_scanner.cpp

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <pybind11/pybind11.h>
1111
#include <pybind11/stl.h>
1212
#include <taskflow/taskflow.hpp>
13+
#include <utf8/utf8.h>
1314

1415
#include "rules_manager.hpp"
1516

@@ -135,18 +136,27 @@ class GitRepositoryScanner {
135136
git_oid_fmt(current_commit_id_string, current_commit_id);
136137

137138
const git_signature * current_commit_author = git_commit_author(current_commit);
138-
std::string current_commit_message = git_commit_message(current_commit);
139+
std::string current_commit_message = utf8::replace_invalid(
140+
git_commit_message(current_commit),
141+
'?'
142+
);
139143

140144
char new_file_oid[41] = {0};
141145
git_oid_fmt(new_file_oid, &delta->new_file.id);
142146

143147
std::string current_commit_author_name = "";
144148
if (nullptr != current_commit_author->name) {
145-
current_commit_author_name = current_commit_author->name;
149+
current_commit_author_name = utf8::replace_invalid(
150+
current_commit_author->name,
151+
'?'
152+
);
146153
}
147154
std::string current_commit_author_email = "";
148155
if (nullptr != current_commit_author->email) {
149-
current_commit_author_email = current_commit_author->email;
156+
current_commit_author_email = utf8::replace_invalid(
157+
current_commit_author->email,
158+
'?'
159+
);
150160
}
151161

152162
git_time_t commit_time = git_commit_time(current_commit);

src/utf8/utf8.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// Copyright 2006 Nemanja Trifunovic
2+
3+
/*
4+
Permission is hereby granted, free of charge, to any person or organization
5+
obtaining a copy of the software and accompanying documentation covered by
6+
this license (the "Software") to use, reproduce, display, distribute,
7+
execute, and transmit the Software, and to prepare derivative works of the
8+
Software, and to permit third-parties to whom the Software is furnished to
9+
do so, all subject to the following:
10+
11+
The copyright notices in the Software and this entire statement, including
12+
the above license grant, this restriction and the following disclaimer,
13+
must be included in all copies of the Software, in whole or in part, and
14+
all derivative works of the Software, unless such copies or derivative
15+
works are solely in the form of machine-executable object code generated by
16+
a source language processor.
17+
18+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20+
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
21+
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
22+
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
23+
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24+
DEALINGS IN THE SOFTWARE.
25+
*/
26+
27+
28+
#ifndef UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
29+
#define UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
30+
31+
#include "utf8/checked.h"
32+
#include "utf8/unchecked.h"
33+
34+
#endif // header guard

0 commit comments

Comments
 (0)