blob: ef8e86d442826531ac45c34a91be2cf22fd2ad6d [file] [log] [blame]
Shivam Mishra7d3876d2020-11-19 12:05:56 +05301import frappe
2from frappe.search.full_text_search import FullTextSearch
Shivam Mishra7d3876d2020-11-19 12:05:56 +05303from frappe.utils import strip_html_tags
Shivam Mishra7d3876d2020-11-19 12:05:56 +05304from whoosh.analysis import StemmingAnalyzer
Chillar Anand915b3432021-09-02 16:44:59 +05305from whoosh.fields import ID, KEYWORD, TEXT, Schema
6from whoosh.qparser import FieldsPlugin, MultifieldParser, WildcardPlugin
Shivam Mishra7d3876d2020-11-19 12:05:56 +05307from whoosh.query import Prefix
8
marination335a2372021-08-12 19:01:10 +05309# TODO: Make obsolete
Shivam Mishra7d3876d2020-11-19 12:05:56 +053010INDEX_NAME = "products"
11
Ankush Menat494bd9e2022-03-28 18:52:46 +053012
Shivam Mishra7d3876d2020-11-19 12:05:56 +053013class ProductSearch(FullTextSearch):
Ankush Menat494bd9e2022-03-28 18:52:46 +053014 """Wrapper for WebsiteSearch"""
Shivam Mishra7d3876d2020-11-19 12:05:56 +053015
16 def get_schema(self):
17 return Schema(
18 title=TEXT(stored=True, field_boost=1.5),
19 name=ID(stored=True),
20 path=ID(stored=True),
21 content=TEXT(stored=True, analyzer=StemmingAnalyzer()),
22 keywords=KEYWORD(stored=True, scorable=True, commas=True),
23 )
24
25 def get_id(self):
26 return "name"
27
28 def get_items_to_index(self):
29 """Get all routes to be indexed, this includes the static pages
30 in www/ and routes from published documents
31
32 Returns:
Ankush Menat494bd9e2022-03-28 18:52:46 +053033 self (object): FullTextSearch Instance
Shivam Mishra7d3876d2020-11-19 12:05:56 +053034 """
35 items = get_all_published_items()
36 documents = [self.get_document_to_index(item) for item in items]
37 return documents
38
39 def get_document_to_index(self, item):
40 try:
41 item = frappe.get_doc("Item", item)
42 title = item.item_name
43 keywords = [item.item_group]
44
45 if item.brand:
46 keywords.append(item.brand)
47
48 if item.website_image_alt:
49 keywords.append(item.website_image_alt)
50
51 if item.has_variants and item.variant_based_on == "Item Attribute":
52 keywords = keywords + [attr.attribute for attr in item.attributes]
53
54 if item.web_long_description:
55 content = strip_html_tags(item.web_long_description)
prssannaa3585e42021-02-01 19:50:27 +053056 elif item.description:
Shivam Mishra7d3876d2020-11-19 12:05:56 +053057 content = strip_html_tags(item.description)
58
59 return frappe._dict(
60 title=title,
61 name=item.name,
62 path=item.route,
63 content=content,
64 keywords=", ".join(keywords),
65 )
66 except Exception:
67 pass
68
69 def search(self, text, scope=None, limit=20):
70 """Search from the current index
71
72 Args:
Ankush Menat494bd9e2022-03-28 18:52:46 +053073 text (str): String to search for
74 scope (str, optional): Scope to limit the search. Defaults to None.
75 limit (int, optional): Limit number of search results. Defaults to 20.
Shivam Mishra7d3876d2020-11-19 12:05:56 +053076
77 Returns:
Ankush Menat494bd9e2022-03-28 18:52:46 +053078 [List(_dict)]: Search results
Shivam Mishra7d3876d2020-11-19 12:05:56 +053079 """
80 ix = self.get_index()
81
82 results = None
83 out = []
84
85 with ix.searcher() as searcher:
86 parser = MultifieldParser(["title", "content", "keywords"], ix.schema)
87 parser.remove_plugin_class(FieldsPlugin)
88 parser.remove_plugin_class(WildcardPlugin)
89 query = parser.parse(text)
90
91 filter_scoped = None
92 if scope:
93 filter_scoped = Prefix(self.id, scope)
94 results = searcher.search(query, limit=limit, filter=filter_scoped)
95
96 for r in results:
97 out.append(self.parse_result(r))
98
99 return out
100
101 def parse_result(self, result):
102 title_highlights = result.highlights("title")
103 content_highlights = result.highlights("content")
104 keyword_highlights = result.highlights("keywords")
105
106 return frappe._dict(
107 title=result["title"],
108 path=result["path"],
109 keywords=result["keywords"],
110 title_highlights=title_highlights,
111 content_highlights=content_highlights,
112 keyword_highlights=keyword_highlights,
113 )
114
Ankush Menat494bd9e2022-03-28 18:52:46 +0530115
Shivam Mishra7d3876d2020-11-19 12:05:56 +0530116def get_all_published_items():
Ankush Menat494bd9e2022-03-28 18:52:46 +0530117 return frappe.get_all(
118 "Website Item", filters={"variant_of": "", "published": 1}, pluck="item_code"
119 )
120
Shivam Mishra7d3876d2020-11-19 12:05:56 +0530121
122def update_index_for_path(path):
123 search = ProductSearch(INDEX_NAME)
124 return search.update_index_by_name(path)
125
Ankush Menat494bd9e2022-03-28 18:52:46 +0530126
Shivam Mishra7d3876d2020-11-19 12:05:56 +0530127def remove_document_from_index(path):
128 search = ProductSearch(INDEX_NAME)
129 return search.remove_document_from_index(path)
130
Ankush Menat494bd9e2022-03-28 18:52:46 +0530131
Shivam Mishra7d3876d2020-11-19 12:05:56 +0530132def build_index_for_all_routes():
133 search = ProductSearch(INDEX_NAME)
Ankush Menat4551d7d2021-08-19 13:41:10 +0530134 return search.build()