Skip to content

Commit 62c959f

Browse files
authored
perf: 优化历史数据分词处理兜底操作 (#313)
1 parent a788d8f commit 62c959f

File tree

1 file changed

+24
-21
lines changed

1 file changed

+24
-21
lines changed

apps/embedding/migrations/0002_embedding_search_vector.py

Lines changed: 24 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -18,27 +18,30 @@ def update_embedding_search_vector(embedding, paragraph_list):
1818

1919

2020
def save_keywords(apps, schema_editor):
21-
document = apps.get_model("dataset", "Document")
22-
embedding = apps.get_model("embedding", "Embedding")
23-
paragraph = apps.get_model('dataset', 'Paragraph')
24-
db_alias = schema_editor.connection.alias
25-
document_list = document.objects.using(db_alias).all()
26-
for document in document_list:
27-
document.status = Status.embedding
28-
document.save()
29-
paragraph_list = paragraph.objects.using(db_alias).filter(document=document).all()
30-
embedding_list = embedding.objects.using(db_alias).filter(document=document).values('id', 'search_vector',
31-
'paragraph')
32-
embedding_update_list = [update_embedding_search_vector(embedding, paragraph_list) for embedding
33-
in embedding_list]
34-
child_array = sub_array(embedding_update_list, 50)
35-
for c in child_array:
36-
try:
37-
embedding.objects.using(db_alias).bulk_update(c, ['search_vector'])
38-
except Exception as e:
39-
print(e)
40-
document.status = Status.success
41-
document.save()
21+
try:
22+
document = apps.get_model("dataset", "Document")
23+
embedding = apps.get_model("embedding", "Embedding")
24+
paragraph = apps.get_model('dataset', 'Paragraph')
25+
db_alias = schema_editor.connection.alias
26+
document_list = document.objects.using(db_alias).all()
27+
for document in document_list:
28+
document.status = Status.embedding
29+
document.save()
30+
paragraph_list = paragraph.objects.using(db_alias).filter(document=document).all()
31+
embedding_list = embedding.objects.using(db_alias).filter(document=document).values('id', 'search_vector',
32+
'paragraph')
33+
embedding_update_list = [update_embedding_search_vector(embedding, paragraph_list) for embedding
34+
in embedding_list]
35+
child_array = sub_array(embedding_update_list, 50)
36+
for c in child_array:
37+
try:
38+
embedding.objects.using(db_alias).bulk_update(c, ['search_vector'])
39+
except Exception as e:
40+
print(e)
41+
document.status = Status.success
42+
document.save()
43+
except Exception as e:
44+
print(e)
4245

4346

4447
class Migration(migrations.Migration):

0 commit comments

Comments
 (0)