Skip to content

Commit 1be3e26

Browse files
authored
updates for 0.4.1 (#26)
* Add additional typing * SeachMethodInput configuration for ret_k
1 parent c376ef8 commit 1be3e26

File tree

16 files changed

+2581
-14519
lines changed

16 files changed

+2581
-14519
lines changed

docs/examples/bayesian_optimization/00_bayes_study.ipynb

Lines changed: 415 additions & 1171 deletions
Large diffs are not rendered by default.

docs/examples/grid_study/00_grid_study.ipynb

Lines changed: 1266 additions & 11200 deletions
Large diffs are not rendered by default.

docs/examples/grid_study/01_custom_grid_study.ipynb

Lines changed: 72 additions & 150 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,37 @@
3535
"%pip install redis-retrieval-optimizer"
3636
]
3737
},
38+
{
39+
"cell_type": "markdown",
40+
"id": "a498afe9",
41+
"metadata": {},
42+
"source": [
43+
"## Check version"
44+
]
45+
},
46+
{
47+
"cell_type": "code",
48+
"execution_count": 1,
49+
"id": "5eea1c17",
50+
"metadata": {},
51+
"outputs": [
52+
{
53+
"data": {
54+
"text/plain": [
55+
"'0.4.1'"
56+
]
57+
},
58+
"execution_count": 1,
59+
"metadata": {},
60+
"output_type": "execute_result"
61+
}
62+
],
63+
"source": [
64+
"import redis_retrieval_optimizer\n",
65+
"\n",
66+
"redis_retrieval_optimizer.__version__"
67+
]
68+
},
3869
{
3970
"cell_type": "markdown",
4071
"id": "270a4f1b",
@@ -45,7 +76,7 @@
4576
},
4677
{
4778
"cell_type": "code",
48-
"execution_count": null,
79+
"execution_count": 2,
4980
"id": "b66894d7",
5081
"metadata": {},
5182
"outputs": [],
@@ -246,12 +277,14 @@
246277
"def gather_pre_filter_results(search_method_input: SearchMethodInput) -> SearchMethodOutput:\n",
247278
" redis_res_vector = {}\n",
248279
"\n",
249-
" for key in search_method_input.raw_queries:\n",
250-
" query_info = search_method_input.raw_queries[key]\n",
251-
" query = pre_filter_query(query_info, 10, search_method_input.emb_model)\n",
280+
" for key, query_info in search_method_input.raw_queries.items():\n",
281+
"\n",
282+
" query = pre_filter_query(query_info, search_method_input.ret_k, search_method_input.emb_model)\n",
283+
"\n",
252284
" res = run_search_w_time(\n",
253285
" search_method_input.index, query, search_method_input.query_metrics\n",
254286
" )\n",
287+
"\n",
255288
" score_dict = make_score_dict_vec(res, id_field_name=\"_id\")\n",
256289
"\n",
257290
" redis_res_vector[key] = score_dict\n",
@@ -265,12 +298,16 @@
265298
"def gather_vector_results(search_method_input: SearchMethodInput) -> SearchMethodOutput:\n",
266299
" redis_res_vector = {}\n",
267300
"\n",
268-
" for key in search_method_input.raw_queries:\n",
269-
" text_query = search_method_input.raw_queries[key]\n",
270-
" vec_query = vector_query(text_query, 10, search_method_input.emb_model)\n",
301+
" for key, text_query in search_method_input.raw_queries.items():\n",
302+
" # create query\n",
303+
" vec_query = vector_query(text_query, search_method_input.ret_k, search_method_input.emb_model)\n",
304+
"\n",
305+
" # run with timing helper\n",
271306
" res = run_search_w_time(\n",
272307
" search_method_input.index, vec_query, search_method_input.query_metrics\n",
273308
" )\n",
309+
"\n",
310+
" # format scores dict for ranx evaluation\n",
274311
" score_dict = make_score_dict_vec(res, id_field_name=\"_id\")\n",
275312
" redis_res_vector[key] = score_dict\n",
276313
" \n",
@@ -335,133 +372,18 @@
335372
"id": "cc56171b",
336373
"metadata": {},
337374
"outputs": [
338-
{
339-
"name": "stderr",
340-
"output_type": "stream",
341-
"text": [
342-
"/Users/tyler.hutcherson/Library/Caches/pypoetry/virtualenvs/redis-retrieval-optimizer-Z5sMIYJj-py3.11/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
343-
" from .autonotebook import tqdm as notebook_tqdm\n"
344-
]
345-
},
346-
{
347-
"name": "stdout",
348-
"output_type": "stream",
349-
"text": [
350-
"09:56:39 datasets INFO PyTorch version 2.7.0 available.\n",
351-
"09:56:40 sentence_transformers.SentenceTransformer INFO Use pytorch device_name: mps\n",
352-
"09:56:40 sentence_transformers.SentenceTransformer INFO Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2\n"
353-
]
354-
},
355-
{
356-
"name": "stderr",
357-
"output_type": "stream",
358-
"text": [
359-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 4.18it/s]\n"
360-
]
361-
},
362-
{
363-
"name": "stdout",
364-
"output_type": "stream",
365-
"text": [
366-
"Recreating: loading corpus from file\n"
367-
]
368-
},
369-
{
370-
"name": "stderr",
371-
"output_type": "stream",
372-
"text": [
373-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 2.60it/s]\n",
374-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 36.69it/s]\n",
375-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 35.04it/s]\n",
376-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 34.18it/s]\n",
377-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 35.12it/s]\n",
378-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 33.56it/s]\n",
379-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 34.25it/s]\n",
380-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 34.53it/s]\n",
381-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 35.27it/s]\n",
382-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 35.48it/s]\n",
383-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 36.56it/s]\n",
384-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 33.55it/s]\n",
385-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 35.90it/s]\n",
386-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 35.01it/s]\n",
387-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 35.20it/s]\n",
388-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 33.45it/s]\n",
389-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 35.65it/s]\n",
390-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 33.36it/s]\n",
391-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 34.15it/s]\n",
392-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 33.05it/s]\n",
393-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 34.01it/s]\n",
394-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 33.60it/s]\n",
395-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 34.79it/s]\n",
396-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 34.81it/s]\n",
397-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 32.98it/s]\n",
398-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 18.21it/s]\n",
399-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 34.96it/s]\n",
400-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 33.50it/s]\n",
401-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 36.06it/s]\n",
402-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 34.57it/s]\n",
403-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 33.98it/s]\n",
404-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 34.42it/s]\n",
405-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 33.87it/s]\n",
406-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 35.03it/s]\n",
407-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 34.14it/s]\n",
408-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 34.35it/s]\n",
409-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 36.25it/s]\n",
410-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 32.23it/s]\n",
411-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 36.36it/s]\n",
412-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 34.40it/s]\n",
413-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 35.12it/s]\n",
414-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 35.41it/s]\n",
415-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 34.67it/s]\n",
416-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 34.36it/s]\n",
417-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 33.65it/s]\n",
418-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 32.82it/s]\n",
419-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 4.29it/s]\n"
420-
]
421-
},
422-
{
423-
"name": "stdout",
424-
"output_type": "stream",
425-
"text": [
426-
"09:56:43 sentence_transformers.SentenceTransformer INFO Use pytorch device_name: mps\n",
427-
"09:56:43 sentence_transformers.SentenceTransformer INFO Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2\n"
428-
]
429-
},
430-
{
431-
"name": "stderr",
432-
"output_type": "stream",
433-
"text": [
434-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 55.47it/s]\n"
435-
]
436-
},
437-
{
438-
"name": "stdout",
439-
"output_type": "stream",
440-
"text": [
441-
"Running search method: basic_vector\n"
442-
]
443-
},
444-
{
445-
"name": "stderr",
446-
"output_type": "stream",
447-
"text": [
448-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 9.15it/s]\n",
449-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 13.11it/s]\n",
450-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 13.83it/s]\n",
451-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 13.65it/s]\n",
452-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 85.35it/s]\n",
453-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 13.78it/s]\n",
454-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 76.28it/s]\n",
455-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 82.05it/s]\n",
456-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 73.41it/s]\n",
457-
"Batches: 100%|██████████| 1/1 [00:00<00:00, 72.11it/s]\n"
458-
]
459-
},
460375
{
461376
"name": "stdout",
462377
"output_type": "stream",
463378
"text": [
464-
"Running search method: pre_filter_vector\n"
379+
"14:59:00 datasets INFO PyTorch version 2.3.0 available.\n",
380+
"14:59:00 sentence_transformers.SentenceTransformer INFO Use pytorch device_name: mps\n",
381+
"14:59:00 sentence_transformers.SentenceTransformer INFO Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2\n",
382+
"Recreating: loading corpus from file\n",
383+
"14:59:08 sentence_transformers.SentenceTransformer INFO Use pytorch device_name: mps\n",
384+
"14:59:08 sentence_transformers.SentenceTransformer INFO Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2\n",
385+
"Running search method: basic_vector with dtype: float32\n",
386+
"Running search method: pre_filter_vector with dtype: float32\n"
465387
]
466388
}
467389
],
@@ -490,7 +412,7 @@
490412
},
491413
{
492414
"cell_type": "code",
493-
"execution_count": 9,
415+
"execution_count": 10,
494416
"id": "47ef7edc",
495417
"metadata": {},
496418
"outputs": [
@@ -518,57 +440,57 @@
518440
" <th>search_method</th>\n",
519441
" <th>model</th>\n",
520442
" <th>avg_query_time</th>\n",
521-
" <th>recall@k</th>\n",
443+
" <th>recall</th>\n",
522444
" <th>precision</th>\n",
523-
" <th>ndcg@k</th>\n",
445+
" <th>f1</th>\n",
524446
" </tr>\n",
525447
" </thead>\n",
526448
" <tbody>\n",
527449
" <tr>\n",
528450
" <th>1</th>\n",
529451
" <td>pre_filter_vector</td>\n",
530452
" <td>sentence-transformers/all-MiniLM-L6-v2</td>\n",
531-
" <td>0.001590</td>\n",
532-
" <td>1.0</td>\n",
533-
" <td>0.25</td>\n",
534-
" <td>0.914903</td>\n",
453+
" <td>0.000536</td>\n",
454+
" <td>1.000000</td>\n",
455+
" <td>0.416667</td>\n",
456+
" <td>0.553810</td>\n",
535457
" </tr>\n",
536458
" <tr>\n",
537459
" <th>0</th>\n",
538460
" <td>basic_vector</td>\n",
539461
" <td>sentence-transformers/all-MiniLM-L6-v2</td>\n",
540-
" <td>0.002136</td>\n",
541-
" <td>0.9</td>\n",
542-
" <td>0.23</td>\n",
543-
" <td>0.717676</td>\n",
462+
" <td>0.001578</td>\n",
463+
" <td>0.866667</td>\n",
464+
" <td>0.350000</td>\n",
465+
" <td>0.470476</td>\n",
544466
" </tr>\n",
545467
" </tbody>\n",
546468
"</table>\n",
547469
"</div>"
548470
],
549471
"text/plain": [
550472
" search_method model avg_query_time \\\n",
551-
"1 pre_filter_vector sentence-transformers/all-MiniLM-L6-v2 0.001590 \n",
552-
"0 basic_vector sentence-transformers/all-MiniLM-L6-v2 0.002136 \n",
473+
"1 pre_filter_vector sentence-transformers/all-MiniLM-L6-v2 0.000536 \n",
474+
"0 basic_vector sentence-transformers/all-MiniLM-L6-v2 0.001578 \n",
553475
"\n",
554-
" recall@k precision ndcg@k \n",
555-
"1 1.0 0.25 0.914903 \n",
556-
"0 0.9 0.23 0.717676 "
476+
" recall precision f1 \n",
477+
"1 1.000000 0.416667 0.553810 \n",
478+
"0 0.866667 0.350000 0.470476 "
557479
]
558480
},
559-
"execution_count": 9,
481+
"execution_count": 10,
560482
"metadata": {},
561483
"output_type": "execute_result"
562484
}
563485
],
564486
"source": [
565-
"metrics[[\"search_method\", \"model\", \"avg_query_time\", \"recall\", \"precision\", \"ndcg\"]].sort_values(by=\"ndcg\", ascending=False)"
487+
"metrics[[\"search_method\", \"model\", \"avg_query_time\", \"recall\", \"precision\", \"f1\"]].sort_values(by=\"f1\", ascending=False)"
566488
]
567489
}
568490
],
569491
"metadata": {
570492
"kernelspec": {
571-
"display_name": "redis-retrieval-optimizer-Z5sMIYJj-py3.11",
493+
"display_name": "Python 3",
572494
"language": "python",
573495
"name": "python3"
574496
},
@@ -582,7 +504,7 @@
582504
"name": "python",
583505
"nbconvert_exporter": "python",
584506
"pygments_lexer": "ipython3",
585-
"version": "3.11.11"
507+
"version": "3.11.9"
586508
}
587509
},
588510
"nbformat": 4,

0 commit comments

Comments
 (0)