feat(tally): Preprocess master data to generate COA
diff --git a/erpnext/erpnext_integrations/doctype/tally_migration/tally_migration.js b/erpnext/erpnext_integrations/doctype/tally_migration/tally_migration.js
index 065fcc7..4652d60 100644
--- a/erpnext/erpnext_integrations/doctype/tally_migration/tally_migration.js
+++ b/erpnext/erpnext_integrations/doctype/tally_migration/tally_migration.js
@@ -3,6 +3,35 @@
 
 frappe.ui.form.on('Tally Migration', {
 	refresh: function(frm) {
-
-	}
+		if (frm.doc.master_data && frm.doc.day_book) {
+			frm.disable_save();
+			if(frm.doc.status != "In Progress") {
+				frm.page.set_primary_action("Preprocess", () => frm.trigger("preprocess"));
+			}
+		} else {
+			frm.set_value("status", "Attach File");
+		}
+		if (frm.doc.tally_company && frm.doc.erpnext_company) {
+			frm.set_df_property("company_section", "hidden", 0);
+			frm.page.set_primary_action("Start Import", () => frm.trigger("start_import"));
+		}
+	},
+	preprocess: function(frm) {
+		frm.call({
+			doc: frm.doc,
+			method: "preprocess",
+			freeze: true
+		}).then((r) => {
+			frm.set_value("status", "Preprocessing In Progress");
+		});
+	},
+	start_import: function(frm) {
+		frm.call({
+			doc: frm.doc,
+			method: "start_import",
+			freeze: true
+		}).then((r) => {
+			frm.set_value("status", "Import In Progress");
+		});
+	},
 });
diff --git a/erpnext/erpnext_integrations/doctype/tally_migration/tally_migration.json b/erpnext/erpnext_integrations/doctype/tally_migration/tally_migration.json
index b54e6be..7d51850 100644
--- a/erpnext/erpnext_integrations/doctype/tally_migration/tally_migration.json
+++ b/erpnext/erpnext_integrations/doctype/tally_migration/tally_migration.json
@@ -21,6 +21,38 @@
    "bold": 0,
    "collapsible": 0,
    "columns": 0,
+   "fieldname": "status",
+   "fieldtype": "Data",
+   "hidden": 1,
+   "ignore_user_permissions": 0,
+   "ignore_xss_filter": 0,
+   "in_filter": 0,
+   "in_global_search": 0,
+   "in_list_view": 0,
+   "in_standard_filter": 0,
+   "label": "Status",
+   "length": 0,
+   "no_copy": 0,
+   "permlevel": 0,
+   "precision": "",
+   "print_hide": 0,
+   "print_hide_if_no_value": 0,
+   "read_only": 0,
+   "remember_last_selected_value": 0,
+   "report_hide": 0,
+   "reqd": 0,
+   "search_index": 0,
+   "set_only_once": 0,
+   "translatable": 0,
+   "unique": 0
+  },
+  {
+   "allow_bulk_edit": 0,
+   "allow_in_quick_entry": 0,
+   "allow_on_submit": 0,
+   "bold": 0,
+   "collapsible": 0,
+   "columns": 0,
    "fieldname": "master_data",
    "fieldtype": "Attach",
    "hidden": 0,
@@ -53,6 +85,40 @@
    "bold": 0,
    "collapsible": 0,
    "columns": 0,
+   "default": "Sundry Creditors",
+   "fieldname": "tally_creditors_account",
+   "fieldtype": "Data",
+   "hidden": 0,
+   "ignore_user_permissions": 0,
+   "ignore_xss_filter": 0,
+   "in_filter": 0,
+   "in_global_search": 0,
+   "in_list_view": 0,
+   "in_standard_filter": 0,
+   "label": "Tally Creditors Account",
+   "length": 0,
+   "no_copy": 0,
+   "options": "",
+   "permlevel": 0,
+   "precision": "",
+   "print_hide": 0,
+   "print_hide_if_no_value": 0,
+   "read_only": 0,
+   "remember_last_selected_value": 0,
+   "report_hide": 0,
+   "reqd": 1,
+   "search_index": 0,
+   "set_only_once": 0,
+   "translatable": 0,
+   "unique": 0
+  },
+  {
+   "allow_bulk_edit": 0,
+   "allow_in_quick_entry": 0,
+   "allow_on_submit": 0,
+   "bold": 0,
+   "collapsible": 0,
+   "columns": 0,
    "fieldname": "column_break_2",
    "fieldtype": "Column Break",
    "hidden": 0,
@@ -108,6 +174,168 @@
    "set_only_once": 0,
    "translatable": 0,
    "unique": 0
+  },
+  {
+   "allow_bulk_edit": 0,
+   "allow_in_quick_entry": 0,
+   "allow_on_submit": 0,
+   "bold": 0,
+   "collapsible": 0,
+   "columns": 0,
+   "default": "Sundry Debtors",
+   "fieldname": "tally_debtors_account",
+   "fieldtype": "Data",
+   "hidden": 0,
+   "ignore_user_permissions": 0,
+   "ignore_xss_filter": 0,
+   "in_filter": 0,
+   "in_global_search": 0,
+   "in_list_view": 0,
+   "in_standard_filter": 0,
+   "label": "Tally Debtors Account",
+   "length": 0,
+   "no_copy": 0,
+   "permlevel": 0,
+   "precision": "",
+   "print_hide": 0,
+   "print_hide_if_no_value": 0,
+   "read_only": 0,
+   "remember_last_selected_value": 0,
+   "report_hide": 0,
+   "reqd": 1,
+   "search_index": 0,
+   "set_only_once": 0,
+   "translatable": 0,
+   "unique": 0
+  },
+  {
+   "allow_bulk_edit": 0,
+   "allow_in_quick_entry": 0,
+   "allow_on_submit": 0,
+   "bold": 0,
+   "collapsible": 0,
+   "columns": 0,
+   "fieldname": "company_section",
+   "fieldtype": "Section Break",
+   "hidden": 1,
+   "ignore_user_permissions": 0,
+   "ignore_xss_filter": 0,
+   "in_filter": 0,
+   "in_global_search": 0,
+   "in_list_view": 0,
+   "in_standard_filter": 0,
+   "label": "Company Section",
+   "length": 0,
+   "no_copy": 0,
+   "permlevel": 0,
+   "precision": "",
+   "print_hide": 0,
+   "print_hide_if_no_value": 0,
+   "read_only": 0,
+   "remember_last_selected_value": 0,
+   "report_hide": 0,
+   "reqd": 0,
+   "search_index": 0,
+   "set_only_once": 0,
+   "translatable": 0,
+   "unique": 0
+  },
+  {
+   "allow_bulk_edit": 0,
+   "allow_in_quick_entry": 0,
+   "allow_on_submit": 0,
+   "bold": 0,
+   "collapsible": 0,
+   "columns": 0,
+   "fieldname": "tally_company",
+   "fieldtype": "Data",
+   "hidden": 0,
+   "ignore_user_permissions": 0,
+   "ignore_xss_filter": 0,
+   "in_filter": 0,
+   "in_global_search": 0,
+   "in_list_view": 0,
+   "in_standard_filter": 0,
+   "label": "Tally Company",
+   "length": 0,
+   "no_copy": 0,
+   "options": "",
+   "permlevel": 0,
+   "precision": "",
+   "print_hide": 0,
+   "print_hide_if_no_value": 0,
+   "read_only": 1,
+   "remember_last_selected_value": 0,
+   "report_hide": 0,
+   "reqd": 0,
+   "search_index": 0,
+   "set_only_once": 0,
+   "translatable": 0,
+   "unique": 0
+  },
+  {
+   "allow_bulk_edit": 0,
+   "allow_in_quick_entry": 0,
+   "allow_on_submit": 0,
+   "bold": 0,
+   "collapsible": 0,
+   "columns": 0,
+   "fieldname": "column_break_8",
+   "fieldtype": "Column Break",
+   "hidden": 0,
+   "ignore_user_permissions": 0,
+   "ignore_xss_filter": 0,
+   "in_filter": 0,
+   "in_global_search": 0,
+   "in_list_view": 0,
+   "in_standard_filter": 0,
+   "length": 0,
+   "no_copy": 0,
+   "permlevel": 0,
+   "precision": "",
+   "print_hide": 0,
+   "print_hide_if_no_value": 0,
+   "read_only": 0,
+   "remember_last_selected_value": 0,
+   "report_hide": 0,
+   "reqd": 0,
+   "search_index": 0,
+   "set_only_once": 0,
+   "translatable": 0,
+   "unique": 0
+  },
+  {
+   "allow_bulk_edit": 0,
+   "allow_in_quick_entry": 0,
+   "allow_on_submit": 0,
+   "bold": 0,
+   "collapsible": 0,
+   "columns": 0,
+   "fieldname": "erpnext_company",
+   "fieldtype": "Data",
+   "hidden": 0,
+   "ignore_user_permissions": 0,
+   "ignore_xss_filter": 0,
+   "in_filter": 0,
+   "in_global_search": 0,
+   "in_list_view": 0,
+   "in_standard_filter": 0,
+   "label": "ERPNext Company",
+   "length": 0,
+   "no_copy": 0,
+   "options": "",
+   "permlevel": 0,
+   "precision": "",
+   "print_hide": 0,
+   "print_hide_if_no_value": 0,
+   "read_only": 0,
+   "remember_last_selected_value": 0,
+   "report_hide": 0,
+   "reqd": 0,
+   "search_index": 0,
+   "set_only_once": 0,
+   "translatable": 0,
+   "unique": 0
   }
  ],
  "has_web_view": 0,
@@ -120,7 +348,7 @@
  "issingle": 0,
  "istable": 0,
  "max_attachments": 0,
- "modified": "2019-02-20 12:59:17.746113",
+ "modified": "2019-03-01 15:02:47.992385",
  "modified_by": "Administrator",
  "module": "ERPNext Integrations",
  "name": "Tally Migration",
diff --git a/erpnext/erpnext_integrations/doctype/tally_migration/tally_migration.py b/erpnext/erpnext_integrations/doctype/tally_migration/tally_migration.py
index f4e064a..4c27d74 100644
--- a/erpnext/erpnext_integrations/doctype/tally_migration/tally_migration.py
+++ b/erpnext/erpnext_integrations/doctype/tally_migration/tally_migration.py
@@ -4,9 +4,129 @@
 
 from __future__ import unicode_literals
 
+import json
+import re
+import zipfile
 import frappe
 from frappe.model.document import Document
+from bs4 import BeautifulSoup as bs
 
+PRIMARY_ACCOUNT = "Primary"
 
 class TallyMigration(Document):
-	pass
+	def _preprocess(self):
+		company, chart_of_accounts_tree, customers, suppliers = self._process_master_data()
+		self.tally_company = company
+		self.erpnext_company = company
+		self.status = "Preprocessed"
+		self.save()
+
+	def _process_master_data(self):
+		def get_master_collection(master_data):
+			master_file = frappe.get_doc("File", {"file_url": master_data})
+
+			with zipfile.ZipFile(master_file.get_full_path()) as zf:
+				content = zf.read(zf.namelist()[0]).decode("utf-16")
+
+			master = bs(sanitize(emptify(content)), "xml")
+			collection = master.BODY.IMPORTDATA.REQUESTDATA
+			return collection
+
+		def get_company_name(collection):
+			return collection.find_all("REMOTECMPINFO.LIST")[0].REMOTECMPNAME.string
+
+		def get_coa_customers_suppliers(collection):
+			root_type_map = {
+				"Application of Funds (Assets)": "Asset",
+				"Expenses": "Expense",
+				"Income": "Income",
+				"Source of Funds (Liabilities)": "Liability"
+			}
+			roots = set(root_type_map.keys())
+			accounts = list(get_groups(collection.find_all("GROUP"))) + list(get_ledgers(collection.find_all("LEDGER")))
+			children, parents = get_children_and_parent_dict(accounts)
+			group_set =  [acc[1] for acc in accounts if acc[2]]
+			children, customers, suppliers = remove_parties(parents, children, group_set)
+			coa = traverse({}, children, roots, roots, group_set)
+
+			for account in coa:
+				coa[account]["root_type"] = root_type_map[account]
+
+			return coa, customers, suppliers
+
+		def get_groups(accounts):
+			for account in accounts:
+				if account["NAME"] in (self.tally_creditors_account, self.tally_debtors_account):
+					yield get_parent(account), account["NAME"], 0
+				else:
+					yield get_parent(account), account["NAME"], 1
+
+		def get_ledgers(accounts):
+			for account in accounts:
+				# If Ledger doesn't have PARENT field then don't create Account
+				# For example "Profit & Loss A/c"
+				if account.PARENT:
+					yield account.PARENT.string, account["NAME"], 0
+
+		def get_parent(account):
+			if account.PARENT:
+				return account.PARENT.string
+			return {
+				("Yes", "No"): "Application of Funds (Assets)",
+				("Yes", "Yes"): "Expenses",
+				("No", "Yes"): "Income",
+				("No", "No"): "Source of Funds (Liabilities)",
+			}[(account.ISDEEMEDPOSITIVE.string, account.ISREVENUE.string)]
+
+		def get_children_and_parent_dict(accounts):
+			children, parents = {}, {}
+			for parent, account, is_group in accounts:
+				children.setdefault(parent, set()).add(account)
+				parents.setdefault(account, set()).add(parent)
+			return children, parents
+
+		def remove_parties(parents, children, group_set):
+			customers, suppliers = set(), set()
+			for account in parents:
+				if self.tally_creditors_account in parents[account]:
+					children.pop(account, None)
+					if account not in group_set:
+						customers.add(account)
+				elif self.tally_debtors_account in parents[account]:
+					children.pop(account, None)
+					if account not in group_set:
+							suppliers.add(account)
+			return children, customers, suppliers
+
+		def traverse(tree, children, accounts, roots, group_set):
+			for account in accounts:
+				if account in group_set or account in roots:
+					if account in children:
+						tree[account] = traverse({}, children, children[account], roots, group_set)
+					else:
+						tree[account] = {"is_group": 1}
+				else:
+					tree[account] = {}
+			return tree
+
+		collection = get_master_collection(self.master_data)
+
+		company = get_company_name(collection)
+		chart_of_accounts_tree, customer_names, supplier_names = get_coa_customers_suppliers(collection)
+
+		return company, chart_of_accounts_tree, customer_names, supplier_names
+
+	def preprocess(self):
+		frappe.enqueue_doc(self.doctype, self.name, "_preprocess")
+
+	def start_import(self):
+		pass
+
+def sanitize(string):
+	return re.sub("", "", string)
+
+def emptify(string):
+	string = re.sub(r"<\w+/>", "", string)
+	string = re.sub(r"<([\w.]+)>\s*<\/\1>", "", string)
+	string = re.sub(r"\r\n", "", string)
+	return string