blob: 752c33e92ee5d8c10a11717425d4cae10319d983 [file] [log] [blame]
Shivam Mishra7d3876d2020-11-19 12:05:56 +05301import frappe
2from frappe.search.full_text_search import FullTextSearch
Shivam Mishra7d3876d2020-11-19 12:05:56 +05303from frappe.utils import strip_html_tags
Shivam Mishra7d3876d2020-11-19 12:05:56 +05304from whoosh.analysis import StemmingAnalyzer
Chillar Anand915b3432021-09-02 16:44:59 +05305from whoosh.fields import ID, KEYWORD, TEXT, Schema
6from whoosh.qparser import FieldsPlugin, MultifieldParser, WildcardPlugin
Shivam Mishra7d3876d2020-11-19 12:05:56 +05307from whoosh.query import Prefix
8
marination335a2372021-08-12 19:01:10 +05309# TODO: Make obsolete
Shivam Mishra7d3876d2020-11-19 12:05:56 +053010INDEX_NAME = "products"
11
12class ProductSearch(FullTextSearch):
13 """ Wrapper for WebsiteSearch """
14
15 def get_schema(self):
16 return Schema(
17 title=TEXT(stored=True, field_boost=1.5),
18 name=ID(stored=True),
19 path=ID(stored=True),
20 content=TEXT(stored=True, analyzer=StemmingAnalyzer()),
21 keywords=KEYWORD(stored=True, scorable=True, commas=True),
22 )
23
24 def get_id(self):
25 return "name"
26
27 def get_items_to_index(self):
28 """Get all routes to be indexed, this includes the static pages
29 in www/ and routes from published documents
30
31 Returns:
32 self (object): FullTextSearch Instance
33 """
34 items = get_all_published_items()
35 documents = [self.get_document_to_index(item) for item in items]
36 return documents
37
38 def get_document_to_index(self, item):
39 try:
40 item = frappe.get_doc("Item", item)
41 title = item.item_name
42 keywords = [item.item_group]
43
44 if item.brand:
45 keywords.append(item.brand)
46
47 if item.website_image_alt:
48 keywords.append(item.website_image_alt)
49
50 if item.has_variants and item.variant_based_on == "Item Attribute":
51 keywords = keywords + [attr.attribute for attr in item.attributes]
52
53 if item.web_long_description:
54 content = strip_html_tags(item.web_long_description)
prssannaa3585e42021-02-01 19:50:27 +053055 elif item.description:
Shivam Mishra7d3876d2020-11-19 12:05:56 +053056 content = strip_html_tags(item.description)
57
58 return frappe._dict(
59 title=title,
60 name=item.name,
61 path=item.route,
62 content=content,
63 keywords=", ".join(keywords),
64 )
65 except Exception:
66 pass
67
68 def search(self, text, scope=None, limit=20):
69 """Search from the current index
70
71 Args:
72 text (str): String to search for
73 scope (str, optional): Scope to limit the search. Defaults to None.
74 limit (int, optional): Limit number of search results. Defaults to 20.
75
76 Returns:
77 [List(_dict)]: Search results
78 """
79 ix = self.get_index()
80
81 results = None
82 out = []
83
84 with ix.searcher() as searcher:
85 parser = MultifieldParser(["title", "content", "keywords"], ix.schema)
86 parser.remove_plugin_class(FieldsPlugin)
87 parser.remove_plugin_class(WildcardPlugin)
88 query = parser.parse(text)
89
90 filter_scoped = None
91 if scope:
92 filter_scoped = Prefix(self.id, scope)
93 results = searcher.search(query, limit=limit, filter=filter_scoped)
94
95 for r in results:
96 out.append(self.parse_result(r))
97
98 return out
99
100 def parse_result(self, result):
101 title_highlights = result.highlights("title")
102 content_highlights = result.highlights("content")
103 keyword_highlights = result.highlights("keywords")
104
105 return frappe._dict(
106 title=result["title"],
107 path=result["path"],
108 keywords=result["keywords"],
109 title_highlights=title_highlights,
110 content_highlights=content_highlights,
111 keyword_highlights=keyword_highlights,
112 )
113
114def get_all_published_items():
marination025574d2021-02-19 12:50:01 +0530115 return frappe.get_all("Website Item", filters={"variant_of": "", "published": 1}, pluck="item_code")
Shivam Mishra7d3876d2020-11-19 12:05:56 +0530116
117def update_index_for_path(path):
118 search = ProductSearch(INDEX_NAME)
119 return search.update_index_by_name(path)
120
121def remove_document_from_index(path):
122 search = ProductSearch(INDEX_NAME)
123 return search.remove_document_from_index(path)
124
125def build_index_for_all_routes():
126 search = ProductSearch(INDEX_NAME)
Ankush Menat4551d7d2021-08-19 13:41:10 +0530127 return search.build()