Shivam Mishra | 7d3876d | 2020-11-19 12:05:56 +0530 | [diff] [blame] | 1 | import frappe |
| 2 | from frappe.search.full_text_search import FullTextSearch |
Shivam Mishra | 7d3876d | 2020-11-19 12:05:56 +0530 | [diff] [blame] | 3 | from frappe.utils import strip_html_tags |
Shivam Mishra | 7d3876d | 2020-11-19 12:05:56 +0530 | [diff] [blame] | 4 | from whoosh.analysis import StemmingAnalyzer |
Chillar Anand | 915b343 | 2021-09-02 16:44:59 +0530 | [diff] [blame] | 5 | from whoosh.fields import ID, KEYWORD, TEXT, Schema |
| 6 | from whoosh.qparser import FieldsPlugin, MultifieldParser, WildcardPlugin |
Shivam Mishra | 7d3876d | 2020-11-19 12:05:56 +0530 | [diff] [blame] | 7 | from whoosh.query import Prefix |
| 8 | |
marination | 335a237 | 2021-08-12 19:01:10 +0530 | [diff] [blame] | 9 | # TODO: Make obsolete |
Shivam Mishra | 7d3876d | 2020-11-19 12:05:56 +0530 | [diff] [blame] | 10 | INDEX_NAME = "products" |
| 11 | |
Ankush Menat | 494bd9e | 2022-03-28 18:52:46 +0530 | [diff] [blame] | 12 | |
Shivam Mishra | 7d3876d | 2020-11-19 12:05:56 +0530 | [diff] [blame] | 13 | class ProductSearch(FullTextSearch): |
Ankush Menat | 494bd9e | 2022-03-28 18:52:46 +0530 | [diff] [blame] | 14 | """Wrapper for WebsiteSearch""" |
Shivam Mishra | 7d3876d | 2020-11-19 12:05:56 +0530 | [diff] [blame] | 15 | |
| 16 | def get_schema(self): |
| 17 | return Schema( |
| 18 | title=TEXT(stored=True, field_boost=1.5), |
| 19 | name=ID(stored=True), |
| 20 | path=ID(stored=True), |
| 21 | content=TEXT(stored=True, analyzer=StemmingAnalyzer()), |
| 22 | keywords=KEYWORD(stored=True, scorable=True, commas=True), |
| 23 | ) |
| 24 | |
| 25 | def get_id(self): |
| 26 | return "name" |
| 27 | |
| 28 | def get_items_to_index(self): |
| 29 | """Get all routes to be indexed, this includes the static pages |
| 30 | in www/ and routes from published documents |
| 31 | |
| 32 | Returns: |
Ankush Menat | 494bd9e | 2022-03-28 18:52:46 +0530 | [diff] [blame] | 33 | self (object): FullTextSearch Instance |
Shivam Mishra | 7d3876d | 2020-11-19 12:05:56 +0530 | [diff] [blame] | 34 | """ |
| 35 | items = get_all_published_items() |
| 36 | documents = [self.get_document_to_index(item) for item in items] |
| 37 | return documents |
| 38 | |
| 39 | def get_document_to_index(self, item): |
| 40 | try: |
| 41 | item = frappe.get_doc("Item", item) |
| 42 | title = item.item_name |
| 43 | keywords = [item.item_group] |
| 44 | |
| 45 | if item.brand: |
| 46 | keywords.append(item.brand) |
| 47 | |
| 48 | if item.website_image_alt: |
| 49 | keywords.append(item.website_image_alt) |
| 50 | |
| 51 | if item.has_variants and item.variant_based_on == "Item Attribute": |
| 52 | keywords = keywords + [attr.attribute for attr in item.attributes] |
| 53 | |
| 54 | if item.web_long_description: |
| 55 | content = strip_html_tags(item.web_long_description) |
prssanna | a3585e4 | 2021-02-01 19:50:27 +0530 | [diff] [blame] | 56 | elif item.description: |
Shivam Mishra | 7d3876d | 2020-11-19 12:05:56 +0530 | [diff] [blame] | 57 | content = strip_html_tags(item.description) |
| 58 | |
| 59 | return frappe._dict( |
| 60 | title=title, |
| 61 | name=item.name, |
| 62 | path=item.route, |
| 63 | content=content, |
| 64 | keywords=", ".join(keywords), |
| 65 | ) |
| 66 | except Exception: |
| 67 | pass |
| 68 | |
| 69 | def search(self, text, scope=None, limit=20): |
| 70 | """Search from the current index |
| 71 | |
| 72 | Args: |
Ankush Menat | 494bd9e | 2022-03-28 18:52:46 +0530 | [diff] [blame] | 73 | text (str): String to search for |
| 74 | scope (str, optional): Scope to limit the search. Defaults to None. |
| 75 | limit (int, optional): Limit number of search results. Defaults to 20. |
Shivam Mishra | 7d3876d | 2020-11-19 12:05:56 +0530 | [diff] [blame] | 76 | |
| 77 | Returns: |
Ankush Menat | 494bd9e | 2022-03-28 18:52:46 +0530 | [diff] [blame] | 78 | [List(_dict)]: Search results |
Shivam Mishra | 7d3876d | 2020-11-19 12:05:56 +0530 | [diff] [blame] | 79 | """ |
| 80 | ix = self.get_index() |
| 81 | |
| 82 | results = None |
| 83 | out = [] |
| 84 | |
| 85 | with ix.searcher() as searcher: |
| 86 | parser = MultifieldParser(["title", "content", "keywords"], ix.schema) |
| 87 | parser.remove_plugin_class(FieldsPlugin) |
| 88 | parser.remove_plugin_class(WildcardPlugin) |
| 89 | query = parser.parse(text) |
| 90 | |
| 91 | filter_scoped = None |
| 92 | if scope: |
| 93 | filter_scoped = Prefix(self.id, scope) |
| 94 | results = searcher.search(query, limit=limit, filter=filter_scoped) |
| 95 | |
| 96 | for r in results: |
| 97 | out.append(self.parse_result(r)) |
| 98 | |
| 99 | return out |
| 100 | |
| 101 | def parse_result(self, result): |
| 102 | title_highlights = result.highlights("title") |
| 103 | content_highlights = result.highlights("content") |
| 104 | keyword_highlights = result.highlights("keywords") |
| 105 | |
| 106 | return frappe._dict( |
| 107 | title=result["title"], |
| 108 | path=result["path"], |
| 109 | keywords=result["keywords"], |
| 110 | title_highlights=title_highlights, |
| 111 | content_highlights=content_highlights, |
| 112 | keyword_highlights=keyword_highlights, |
| 113 | ) |
| 114 | |
Ankush Menat | 494bd9e | 2022-03-28 18:52:46 +0530 | [diff] [blame] | 115 | |
Shivam Mishra | 7d3876d | 2020-11-19 12:05:56 +0530 | [diff] [blame] | 116 | def get_all_published_items(): |
Ankush Menat | 494bd9e | 2022-03-28 18:52:46 +0530 | [diff] [blame] | 117 | return frappe.get_all( |
| 118 | "Website Item", filters={"variant_of": "", "published": 1}, pluck="item_code" |
| 119 | ) |
| 120 | |
Shivam Mishra | 7d3876d | 2020-11-19 12:05:56 +0530 | [diff] [blame] | 121 | |
| 122 | def update_index_for_path(path): |
| 123 | search = ProductSearch(INDEX_NAME) |
| 124 | return search.update_index_by_name(path) |
| 125 | |
Ankush Menat | 494bd9e | 2022-03-28 18:52:46 +0530 | [diff] [blame] | 126 | |
Shivam Mishra | 7d3876d | 2020-11-19 12:05:56 +0530 | [diff] [blame] | 127 | def remove_document_from_index(path): |
| 128 | search = ProductSearch(INDEX_NAME) |
| 129 | return search.remove_document_from_index(path) |
| 130 | |
Ankush Menat | 494bd9e | 2022-03-28 18:52:46 +0530 | [diff] [blame] | 131 | |
Shivam Mishra | 7d3876d | 2020-11-19 12:05:56 +0530 | [diff] [blame] | 132 | def build_index_for_all_routes(): |
| 133 | search = ProductSearch(INDEX_NAME) |
Ankush Menat | 4551d7d | 2021-08-19 13:41:10 +0530 | [diff] [blame] | 134 | return search.build() |