| import frappe |
| from frappe.search.full_text_search import FullTextSearch |
| from frappe.utils import strip_html_tags |
| from whoosh.analysis import StemmingAnalyzer |
| from whoosh.fields import ID, KEYWORD, TEXT, Schema |
| from whoosh.qparser import FieldsPlugin, MultifieldParser, WildcardPlugin |
| from whoosh.query import Prefix |
| |
| # TODO: Make obsolete |
| INDEX_NAME = "products" |
| |
| |
| class ProductSearch(FullTextSearch): |
| """Wrapper for WebsiteSearch""" |
| |
| def get_schema(self): |
| return Schema( |
| title=TEXT(stored=True, field_boost=1.5), |
| name=ID(stored=True), |
| path=ID(stored=True), |
| content=TEXT(stored=True, analyzer=StemmingAnalyzer()), |
| keywords=KEYWORD(stored=True, scorable=True, commas=True), |
| ) |
| |
| def get_id(self): |
| return "name" |
| |
| def get_items_to_index(self): |
| """Get all routes to be indexed, this includes the static pages |
| in www/ and routes from published documents |
| |
| Returns: |
| self (object): FullTextSearch Instance |
| """ |
| items = get_all_published_items() |
| documents = [self.get_document_to_index(item) for item in items] |
| return documents |
| |
| def get_document_to_index(self, item): |
| try: |
| item = frappe.get_doc("Item", item) |
| title = item.item_name |
| keywords = [item.item_group] |
| |
| if item.brand: |
| keywords.append(item.brand) |
| |
| if item.website_image_alt: |
| keywords.append(item.website_image_alt) |
| |
| if item.has_variants and item.variant_based_on == "Item Attribute": |
| keywords = keywords + [attr.attribute for attr in item.attributes] |
| |
| if item.web_long_description: |
| content = strip_html_tags(item.web_long_description) |
| elif item.description: |
| content = strip_html_tags(item.description) |
| |
| return frappe._dict( |
| title=title, |
| name=item.name, |
| path=item.route, |
| content=content, |
| keywords=", ".join(keywords), |
| ) |
| except Exception: |
| pass |
| |
| def search(self, text, scope=None, limit=20): |
| """Search from the current index |
| |
| Args: |
| text (str): String to search for |
| scope (str, optional): Scope to limit the search. Defaults to None. |
| limit (int, optional): Limit number of search results. Defaults to 20. |
| |
| Returns: |
| [List(_dict)]: Search results |
| """ |
| ix = self.get_index() |
| |
| results = None |
| out = [] |
| |
| with ix.searcher() as searcher: |
| parser = MultifieldParser(["title", "content", "keywords"], ix.schema) |
| parser.remove_plugin_class(FieldsPlugin) |
| parser.remove_plugin_class(WildcardPlugin) |
| query = parser.parse(text) |
| |
| filter_scoped = None |
| if scope: |
| filter_scoped = Prefix(self.id, scope) |
| results = searcher.search(query, limit=limit, filter=filter_scoped) |
| |
| for r in results: |
| out.append(self.parse_result(r)) |
| |
| return out |
| |
| def parse_result(self, result): |
| title_highlights = result.highlights("title") |
| content_highlights = result.highlights("content") |
| keyword_highlights = result.highlights("keywords") |
| |
| return frappe._dict( |
| title=result["title"], |
| path=result["path"], |
| keywords=result["keywords"], |
| title_highlights=title_highlights, |
| content_highlights=content_highlights, |
| keyword_highlights=keyword_highlights, |
| ) |
| |
| |
| def get_all_published_items(): |
| return frappe.get_all( |
| "Website Item", filters={"variant_of": "", "published": 1}, pluck="item_code" |
| ) |
| |
| |
| def update_index_for_path(path): |
| search = ProductSearch(INDEX_NAME) |
| return search.update_index_by_name(path) |
| |
| |
| def remove_document_from_index(path): |
| search = ProductSearch(INDEX_NAME) |
| return search.remove_document_from_index(path) |
| |
| |
| def build_index_for_all_routes(): |
| search = ProductSearch(INDEX_NAME) |
| return search.build() |