|
35 | 35 | "%pip install redis-retrieval-optimizer" |
36 | 36 | ] |
37 | 37 | }, |
| 38 | + { |
| 39 | + "cell_type": "markdown", |
| 40 | + "id": "a498afe9", |
| 41 | + "metadata": {}, |
| 42 | + "source": [ |
| 43 | + "## Check version" |
| 44 | + ] |
| 45 | + }, |
| 46 | + { |
| 47 | + "cell_type": "code", |
| 48 | + "execution_count": 1, |
| 49 | + "id": "5eea1c17", |
| 50 | + "metadata": {}, |
| 51 | + "outputs": [ |
| 52 | + { |
| 53 | + "data": { |
| 54 | + "text/plain": [ |
| 55 | + "'0.4.1'" |
| 56 | + ] |
| 57 | + }, |
| 58 | + "execution_count": 1, |
| 59 | + "metadata": {}, |
| 60 | + "output_type": "execute_result" |
| 61 | + } |
| 62 | + ], |
| 63 | + "source": [ |
| 64 | + "import redis_retrieval_optimizer\n", |
| 65 | + "\n", |
| 66 | + "redis_retrieval_optimizer.__version__" |
| 67 | + ] |
| 68 | + }, |
38 | 69 | { |
39 | 70 | "cell_type": "markdown", |
40 | 71 | "id": "270a4f1b", |
|
45 | 76 | }, |
46 | 77 | { |
47 | 78 | "cell_type": "code", |
48 | | - "execution_count": null, |
| 79 | + "execution_count": 2, |
49 | 80 | "id": "b66894d7", |
50 | 81 | "metadata": {}, |
51 | 82 | "outputs": [], |
|
246 | 277 | "def gather_pre_filter_results(search_method_input: SearchMethodInput) -> SearchMethodOutput:\n", |
247 | 278 | " redis_res_vector = {}\n", |
248 | 279 | "\n", |
249 | | - " for key in search_method_input.raw_queries:\n", |
250 | | - " query_info = search_method_input.raw_queries[key]\n", |
251 | | - " query = pre_filter_query(query_info, 10, search_method_input.emb_model)\n", |
| 280 | + " for key, query_info in search_method_input.raw_queries.items():\n", |
| 281 | + "\n", |
| 282 | + " query = pre_filter_query(query_info, search_method_input.ret_k, search_method_input.emb_model)\n", |
| 283 | + "\n", |
252 | 284 | " res = run_search_w_time(\n", |
253 | 285 | " search_method_input.index, query, search_method_input.query_metrics\n", |
254 | 286 | " )\n", |
| 287 | + "\n", |
255 | 288 | " score_dict = make_score_dict_vec(res, id_field_name=\"_id\")\n", |
256 | 289 | "\n", |
257 | 290 | " redis_res_vector[key] = score_dict\n", |
|
265 | 298 | "def gather_vector_results(search_method_input: SearchMethodInput) -> SearchMethodOutput:\n", |
266 | 299 | " redis_res_vector = {}\n", |
267 | 300 | "\n", |
268 | | - " for key in search_method_input.raw_queries:\n", |
269 | | - " text_query = search_method_input.raw_queries[key]\n", |
270 | | - " vec_query = vector_query(text_query, 10, search_method_input.emb_model)\n", |
| 301 | + " for key, text_query in search_method_input.raw_queries.items():\n", |
| 302 | + " # create query\n", |
| 303 | + " vec_query = vector_query(text_query, search_method_input.ret_k, search_method_input.emb_model)\n", |
| 304 | + "\n", |
| 305 | + " # run with timing helper\n", |
271 | 306 | " res = run_search_w_time(\n", |
272 | 307 | " search_method_input.index, vec_query, search_method_input.query_metrics\n", |
273 | 308 | " )\n", |
| 309 | + "\n", |
| 310 | + " # format scores dict for ranx evaluation\n", |
274 | 311 | " score_dict = make_score_dict_vec(res, id_field_name=\"_id\")\n", |
275 | 312 | " redis_res_vector[key] = score_dict\n", |
276 | 313 | " \n", |
|
335 | 372 | "id": "cc56171b", |
336 | 373 | "metadata": {}, |
337 | 374 | "outputs": [ |
338 | | - { |
339 | | - "name": "stderr", |
340 | | - "output_type": "stream", |
341 | | - "text": [ |
342 | | - "/Users/tyler.hutcherson/Library/Caches/pypoetry/virtualenvs/redis-retrieval-optimizer-Z5sMIYJj-py3.11/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", |
343 | | - " from .autonotebook import tqdm as notebook_tqdm\n" |
344 | | - ] |
345 | | - }, |
346 | | - { |
347 | | - "name": "stdout", |
348 | | - "output_type": "stream", |
349 | | - "text": [ |
350 | | - "09:56:39 datasets INFO PyTorch version 2.7.0 available.\n", |
351 | | - "09:56:40 sentence_transformers.SentenceTransformer INFO Use pytorch device_name: mps\n", |
352 | | - "09:56:40 sentence_transformers.SentenceTransformer INFO Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2\n" |
353 | | - ] |
354 | | - }, |
355 | | - { |
356 | | - "name": "stderr", |
357 | | - "output_type": "stream", |
358 | | - "text": [ |
359 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 4.18it/s]\n" |
360 | | - ] |
361 | | - }, |
362 | | - { |
363 | | - "name": "stdout", |
364 | | - "output_type": "stream", |
365 | | - "text": [ |
366 | | - "Recreating: loading corpus from file\n" |
367 | | - ] |
368 | | - }, |
369 | | - { |
370 | | - "name": "stderr", |
371 | | - "output_type": "stream", |
372 | | - "text": [ |
373 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 2.60it/s]\n", |
374 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 36.69it/s]\n", |
375 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 35.04it/s]\n", |
376 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 34.18it/s]\n", |
377 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 35.12it/s]\n", |
378 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 33.56it/s]\n", |
379 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 34.25it/s]\n", |
380 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 34.53it/s]\n", |
381 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 35.27it/s]\n", |
382 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 35.48it/s]\n", |
383 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 36.56it/s]\n", |
384 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 33.55it/s]\n", |
385 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 35.90it/s]\n", |
386 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 35.01it/s]\n", |
387 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 35.20it/s]\n", |
388 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 33.45it/s]\n", |
389 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 35.65it/s]\n", |
390 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 33.36it/s]\n", |
391 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 34.15it/s]\n", |
392 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 33.05it/s]\n", |
393 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 34.01it/s]\n", |
394 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 33.60it/s]\n", |
395 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 34.79it/s]\n", |
396 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 34.81it/s]\n", |
397 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 32.98it/s]\n", |
398 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 18.21it/s]\n", |
399 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 34.96it/s]\n", |
400 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 33.50it/s]\n", |
401 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 36.06it/s]\n", |
402 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 34.57it/s]\n", |
403 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 33.98it/s]\n", |
404 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 34.42it/s]\n", |
405 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 33.87it/s]\n", |
406 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 35.03it/s]\n", |
407 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 34.14it/s]\n", |
408 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 34.35it/s]\n", |
409 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 36.25it/s]\n", |
410 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 32.23it/s]\n", |
411 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 36.36it/s]\n", |
412 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 34.40it/s]\n", |
413 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 35.12it/s]\n", |
414 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 35.41it/s]\n", |
415 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 34.67it/s]\n", |
416 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 34.36it/s]\n", |
417 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 33.65it/s]\n", |
418 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 32.82it/s]\n", |
419 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 4.29it/s]\n" |
420 | | - ] |
421 | | - }, |
422 | | - { |
423 | | - "name": "stdout", |
424 | | - "output_type": "stream", |
425 | | - "text": [ |
426 | | - "09:56:43 sentence_transformers.SentenceTransformer INFO Use pytorch device_name: mps\n", |
427 | | - "09:56:43 sentence_transformers.SentenceTransformer INFO Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2\n" |
428 | | - ] |
429 | | - }, |
430 | | - { |
431 | | - "name": "stderr", |
432 | | - "output_type": "stream", |
433 | | - "text": [ |
434 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 55.47it/s]\n" |
435 | | - ] |
436 | | - }, |
437 | | - { |
438 | | - "name": "stdout", |
439 | | - "output_type": "stream", |
440 | | - "text": [ |
441 | | - "Running search method: basic_vector\n" |
442 | | - ] |
443 | | - }, |
444 | | - { |
445 | | - "name": "stderr", |
446 | | - "output_type": "stream", |
447 | | - "text": [ |
448 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 9.15it/s]\n", |
449 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 13.11it/s]\n", |
450 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 13.83it/s]\n", |
451 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 13.65it/s]\n", |
452 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 85.35it/s]\n", |
453 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 13.78it/s]\n", |
454 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 76.28it/s]\n", |
455 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 82.05it/s]\n", |
456 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 73.41it/s]\n", |
457 | | - "Batches: 100%|██████████| 1/1 [00:00<00:00, 72.11it/s]\n" |
458 | | - ] |
459 | | - }, |
460 | 375 | { |
461 | 376 | "name": "stdout", |
462 | 377 | "output_type": "stream", |
463 | 378 | "text": [ |
464 | | - "Running search method: pre_filter_vector\n" |
| 379 | + "14:59:00 datasets INFO PyTorch version 2.3.0 available.\n", |
| 380 | + "14:59:00 sentence_transformers.SentenceTransformer INFO Use pytorch device_name: mps\n", |
| 381 | + "14:59:00 sentence_transformers.SentenceTransformer INFO Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2\n", |
| 382 | + "Recreating: loading corpus from file\n", |
| 383 | + "14:59:08 sentence_transformers.SentenceTransformer INFO Use pytorch device_name: mps\n", |
| 384 | + "14:59:08 sentence_transformers.SentenceTransformer INFO Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2\n", |
| 385 | + "Running search method: basic_vector with dtype: float32\n", |
| 386 | + "Running search method: pre_filter_vector with dtype: float32\n" |
465 | 387 | ] |
466 | 388 | } |
467 | 389 | ], |
|
490 | 412 | }, |
491 | 413 | { |
492 | 414 | "cell_type": "code", |
493 | | - "execution_count": 9, |
| 415 | + "execution_count": 10, |
494 | 416 | "id": "47ef7edc", |
495 | 417 | "metadata": {}, |
496 | 418 | "outputs": [ |
|
518 | 440 | " <th>search_method</th>\n", |
519 | 441 | " <th>model</th>\n", |
520 | 442 | " <th>avg_query_time</th>\n", |
521 | | - " <th>recall@k</th>\n", |
| 443 | + " <th>recall</th>\n", |
522 | 444 | " <th>precision</th>\n", |
523 | | - " <th>ndcg@k</th>\n", |
| 445 | + " <th>f1</th>\n", |
524 | 446 | " </tr>\n", |
525 | 447 | " </thead>\n", |
526 | 448 | " <tbody>\n", |
527 | 449 | " <tr>\n", |
528 | 450 | " <th>1</th>\n", |
529 | 451 | " <td>pre_filter_vector</td>\n", |
530 | 452 | " <td>sentence-transformers/all-MiniLM-L6-v2</td>\n", |
531 | | - " <td>0.001590</td>\n", |
532 | | - " <td>1.0</td>\n", |
533 | | - " <td>0.25</td>\n", |
534 | | - " <td>0.914903</td>\n", |
| 453 | + " <td>0.000536</td>\n", |
| 454 | + " <td>1.000000</td>\n", |
| 455 | + " <td>0.416667</td>\n", |
| 456 | + " <td>0.553810</td>\n", |
535 | 457 | " </tr>\n", |
536 | 458 | " <tr>\n", |
537 | 459 | " <th>0</th>\n", |
538 | 460 | " <td>basic_vector</td>\n", |
539 | 461 | " <td>sentence-transformers/all-MiniLM-L6-v2</td>\n", |
540 | | - " <td>0.002136</td>\n", |
541 | | - " <td>0.9</td>\n", |
542 | | - " <td>0.23</td>\n", |
543 | | - " <td>0.717676</td>\n", |
| 462 | + " <td>0.001578</td>\n", |
| 463 | + " <td>0.866667</td>\n", |
| 464 | + " <td>0.350000</td>\n", |
| 465 | + " <td>0.470476</td>\n", |
544 | 466 | " </tr>\n", |
545 | 467 | " </tbody>\n", |
546 | 468 | "</table>\n", |
547 | 469 | "</div>" |
548 | 470 | ], |
549 | 471 | "text/plain": [ |
550 | 472 | " search_method model avg_query_time \\\n", |
551 | | - "1 pre_filter_vector sentence-transformers/all-MiniLM-L6-v2 0.001590 \n", |
552 | | - "0 basic_vector sentence-transformers/all-MiniLM-L6-v2 0.002136 \n", |
| 473 | + "1 pre_filter_vector sentence-transformers/all-MiniLM-L6-v2 0.000536 \n", |
| 474 | + "0 basic_vector sentence-transformers/all-MiniLM-L6-v2 0.001578 \n", |
553 | 475 | "\n", |
554 | | - " recall@k precision ndcg@k \n", |
555 | | - "1 1.0 0.25 0.914903 \n", |
556 | | - "0 0.9 0.23 0.717676 " |
| 476 | + " recall precision f1 \n", |
| 477 | + "1 1.000000 0.416667 0.553810 \n", |
| 478 | + "0 0.866667 0.350000 0.470476 " |
557 | 479 | ] |
558 | 480 | }, |
559 | | - "execution_count": 9, |
| 481 | + "execution_count": 10, |
560 | 482 | "metadata": {}, |
561 | 483 | "output_type": "execute_result" |
562 | 484 | } |
563 | 485 | ], |
564 | 486 | "source": [ |
565 | | - "metrics[[\"search_method\", \"model\", \"avg_query_time\", \"recall\", \"precision\", \"ndcg\"]].sort_values(by=\"ndcg\", ascending=False)" |
| 487 | + "metrics[[\"search_method\", \"model\", \"avg_query_time\", \"recall\", \"precision\", \"f1\"]].sort_values(by=\"f1\", ascending=False)" |
566 | 488 | ] |
567 | 489 | } |
568 | 490 | ], |
569 | 491 | "metadata": { |
570 | 492 | "kernelspec": { |
571 | | - "display_name": "redis-retrieval-optimizer-Z5sMIYJj-py3.11", |
| 493 | + "display_name": "Python 3", |
572 | 494 | "language": "python", |
573 | 495 | "name": "python3" |
574 | 496 | }, |
|
582 | 504 | "name": "python", |
583 | 505 | "nbconvert_exporter": "python", |
584 | 506 | "pygments_lexer": "ipython3", |
585 | | - "version": "3.11.11" |
| 507 | + "version": "3.11.9" |
586 | 508 | } |
587 | 509 | }, |
588 | 510 | "nbformat": 4, |
|
0 commit comments