Skip to content

Commit 860e69d

Browse files
committed
Fix: Incorrect Binary Search output for duplicate elements (#13886, #13840)
1 parent ae68a78 commit 860e69d

File tree

6 files changed

+97
-33
lines changed

6 files changed

+97
-33
lines changed

matrix/binary_search_matrix.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,20 @@ def binary_search(array: list, lower_bound: int, upper_bound: int, value: int) -
99
0
1010
>>> binary_search(matrix, 0, len(matrix) - 1, 23)
1111
-1
12+
>>> matrix_dup = [1, 4, 4, 4, 7, 11, 15]
13+
>>> binary_search(matrix_dup, 0, len(matrix_dup) - 1, 4)
14+
1
15+
>>> binary_search(matrix_dup, 0, len(matrix_dup) - 1, 7)
16+
4
17+
>>> binary_search(matrix_dup, 0, len(matrix_dup) - 1, 0)
18+
-1
1219
"""
1320

14-
r = int((lower_bound + upper_bound) // 2)
21+
r = (lower_bound + upper_bound) // 2
1522
if array[r] == value:
23+
# Move left to find the first occurrence of duplicates
24+
while r > 0 and array[r - 1] == value:
25+
r -= 1
1626
return r
1727
if lower_bound >= upper_bound:
1828
return -1

other/number_container_system.py

Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ def binary_search_delete(self, array: list | str | range, item: int) -> list[int
5050
Traceback (most recent call last):
5151
...
5252
TypeError: binary_search_delete() only accepts either a list, range or str
53+
>>> NumberContainer().binary_search_delete([1,2,2,3], 2)
54+
[1, 2, 3]
55+
>>> NumberContainer().binary_search_delete([0,0,1,1,1], 1)
56+
[0, 0, 1, 1]
5357
"""
5458
if isinstance(array, (range, str)):
5559
array = list(array)
@@ -60,19 +64,27 @@ def binary_search_delete(self, array: list | str | range, item: int) -> list[int
6064

6165
low = 0
6266
high = len(array) - 1
67+
result_index = -1
6368

69+
# Find the first occurrence of `item`
6470
while low <= high:
6571
mid = (low + high) // 2
66-
if array[mid] == item:
67-
array.pop(mid)
68-
return array
69-
elif array[mid] < item:
72+
if array[mid] < item:
7073
low = mid + 1
74+
elif array[mid] > item:
75+
high = mid - 1
7176
else:
77+
# Found the item, move left to find the first occurrence
78+
result_index = mid
7279
high = mid - 1
73-
raise ValueError(
74-
"Either the item is not in the array or the array was unsorted"
75-
)
80+
81+
if result_index == -1:
82+
raise ValueError(
83+
"Either the item is not in the array or the array was unsorted"
84+
)
85+
86+
array.pop(result_index)
87+
return array
7688

7789
def binary_search_insert(self, array: list | str | range, index: int) -> list[int]:
7890
"""
@@ -95,6 +107,10 @@ def binary_search_insert(self, array: list | str | range, index: int) -> list[in
95107
Traceback (most recent call last):
96108
...
97109
TypeError: binary_search_insert() only accepts either a list, range or str
110+
>>> NumberContainer().binary_search_insert([1,2,2,3], 2)
111+
[1, 2, 2, 2, 3]
112+
>>> NumberContainer().binary_search_insert([0,0,1,1], 1)
113+
[0, 0, 1, 1, 1]
98114
"""
99115
if isinstance(array, (range, str)):
100116
array = list(array)
@@ -106,19 +122,14 @@ def binary_search_insert(self, array: list | str | range, index: int) -> list[in
106122
low = 0
107123
high = len(array) - 1
108124

125+
# Find the correct insertion position
109126
while low <= high:
110127
mid = (low + high) // 2
111-
if array[mid] == index:
112-
# If the item already exists in the array,
113-
# insert it after the existing item
114-
array.insert(mid + 1, index)
115-
return array
116-
elif array[mid] < index:
128+
if array[mid] < index:
117129
low = mid + 1
118130
else:
119131
high = mid - 1
120132

121-
# If the item doesn't exist in the array, insert it at the appropriate position
122133
array.insert(low, index)
123134
return array
124135

searches/binary_search.py

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -197,21 +197,28 @@ def binary_search(sorted_collection: list[int], item: int) -> int:
197197
1
198198
>>> binary_search([0, 5, 7, 10, 15], 6)
199199
-1
200+
>>> binary_search([1, 2, 2, 2, 3], 2)
201+
2
202+
>>> binary_search([4, 4, 4, 4], 4)
203+
1
200204
"""
201205
if list(sorted_collection) != sorted(sorted_collection):
202206
raise ValueError("sorted_collection must be sorted in ascending order")
207+
203208
left = 0
204209
right = len(sorted_collection) - 1
205210

206211
while left <= right:
207212
midpoint = left + (right - left) // 2
208213
current_item = sorted_collection[midpoint]
214+
209215
if current_item == item:
210-
return midpoint
211-
elif item < current_item:
212-
right = midpoint - 1
213-
else:
216+
return midpoint # correct handling even with duplicates
217+
elif current_item < item:
214218
left = midpoint + 1
219+
else:
220+
right = midpoint - 1
221+
215222
return -1
216223

217224

@@ -234,12 +241,17 @@ def binary_search_std_lib(sorted_collection: list[int], item: int) -> int:
234241
1
235242
>>> binary_search_std_lib([0, 5, 7, 10, 15], 6)
236243
-1
244+
>>> binary_search_std_lib([1, 2, 2, 2, 3], 2)
245+
1
246+
>>> binary_search_std_lib([4, 4, 4, 4], 4)
247+
0
237248
"""
238249
if list(sorted_collection) != sorted(sorted_collection):
239250
raise ValueError("sorted_collection must be sorted in ascending order")
251+
240252
index = bisect.bisect_left(sorted_collection, item)
241253
if index != len(sorted_collection) and sorted_collection[index] == item:
242-
return index
254+
return index # bisect_left handles duplicates correctly
243255
return -1
244256

245257

@@ -265,23 +277,30 @@ def binary_search_by_recursion(
265277
1
266278
>>> binary_search_by_recursion([0, 5, 7, 10, 15], 6, 0, 4)
267279
-1
280+
>>> binary_search_by_recursion([1, 2, 2, 2, 3], 2, 0, 4)
281+
2
282+
>>> binary_search_by_recursion([4, 4, 4, 4], 4, 0, 3)
283+
1
268284
"""
269285
if right < 0:
270286
right = len(sorted_collection) - 1
287+
271288
if list(sorted_collection) != sorted(sorted_collection):
272289
raise ValueError("sorted_collection must be sorted in ascending order")
273-
if right < left:
290+
291+
if left > right:
274292
return -1
275293

276294
midpoint = left + (right - left) // 2
295+
mid_value = sorted_collection[midpoint]
277296

278-
if sorted_collection[midpoint] == item:
279-
return midpoint
280-
elif sorted_collection[midpoint] > item:
297+
if mid_value == item:
298+
return midpoint # valid index even with duplicates
299+
elif mid_value > item:
281300
return binary_search_by_recursion(sorted_collection, item, left, midpoint - 1)
282301
else:
283302
return binary_search_by_recursion(sorted_collection, item, midpoint + 1, right)
284-
303+
285304

286305
def exponential_search(sorted_collection: list[int], item: int) -> int:
287306
"""Pure implementation of an exponential search algorithm in Python

searches/exponential_search.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ def binary_search_by_recursion(
3939
1
4040
>>> binary_search_by_recursion([0, 5, 7, 10, 15], 6, 0, 4)
4141
-1
42+
>>> binary_search_by_recursion([1, 2, 2, 2, 3], 2, 0, 4)
43+
1
44+
>>> binary_search_by_recursion([2, 2, 2, 2], 2, 0, 3)
45+
0
4246
"""
4347
if right < 0:
4448
right = len(sorted_collection) - 1
@@ -48,10 +52,15 @@ def binary_search_by_recursion(
4852
return -1
4953

5054
midpoint = left + (right - left) // 2
51-
52-
if sorted_collection[midpoint] == item:
53-
return midpoint
54-
elif sorted_collection[midpoint] > item:
55+
mid_value = sorted_collection[midpoint]
56+
57+
if mid_value == item:
58+
# check if this is the first occurrence
59+
if midpoint == 0 or sorted_collection[midpoint - 1] < item:
60+
return midpoint
61+
else:
62+
return binary_search_by_recursion(sorted_collection, item, left, midpoint - 1)
63+
elif mid_value > item:
5564
return binary_search_by_recursion(sorted_collection, item, left, midpoint - 1)
5665
else:
5766
return binary_search_by_recursion(sorted_collection, item, midpoint + 1, right)

searches/simple_binary_search.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,14 @@ def binary_search(a_list: list[int], item: int) -> bool:
4040
False
4141
>>> binary_search(range(0, 10000, 5), 2)
4242
False
43+
>>> binary_search([1, 1, 1, 2, 2, 3], 1)
44+
True
45+
>>> binary_search([1, 1, 1, 2, 2, 3], 2)
46+
True
47+
>>> binary_search([1, 1, 1, 2, 2, 3], 3)
48+
True
49+
>>> binary_search([1, 1, 1, 2, 2, 3], 4)
50+
False
4351
"""
4452
if len(a_list) == 0:
4553
return False

sorts/tim_sort.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,24 @@
11
def binary_search(lst, item, start, end):
2-
if start == end:
3-
return start if lst[start] > item else start + 1
2+
"""
3+
Binary search that returns the index of the first occurrence of `item` if present,
4+
or the correct insertion index if not present, even with duplicates.
5+
"""
46
if start > end:
57
return start
68

79
mid = (start + end) // 2
10+
811
if lst[mid] < item:
912
return binary_search(lst, item, mid + 1, end)
1013
elif lst[mid] > item:
1114
return binary_search(lst, item, start, mid - 1)
1215
else:
13-
return mid
14-
16+
# Move left to find the first occurrence
17+
if mid == start or lst[mid - 1] != item:
18+
return mid
19+
else:
20+
return binary_search(lst, item, start, mid - 1)
21+
1522

1623
def insertion_sort(lst):
1724
length = len(lst)

0 commit comments

Comments
 (0)