]> wagnertech.de Git - mDoc.git/blob - python/mDoc/mdoc/extractor.py
1b9b7be705d68685453d7cb34e0f21c493fca460
[mDoc.git] / python / mDoc / mdoc / extractor.py
1 '''
2 Created on 26.03.2021
3
4 @author: antix19
5 '''
6 from mutil.XmlExtractor import XmlExtractor
7 from mutil import XmlExtractor as XMLE
8 from pickle import NONE, FALSE, TRUE
9
10 def eval_class(classs, item):
11     item_hira = item.split(".")
12     ilen = len(item_hira) 
13     class_hira = classs.split(".")
14     if len(class_hira) < len(item_hira):
15         return False,ilen
16     for i in range(len(item_hira)):
17         if item_hira[i] != class_hira[i]:
18             return False,ilen
19     return True,ilen
20     
21 def is_for_print(classs, attrs):
22     include = False
23     exclude = False
24     ilen    = 0
25     elen    = 0
26     if "include" in attrs:
27         include,ilen = eval_class(classs, attrs["include"])
28     if "exclude" in attrs:
29         exclude,elen = eval_class(classs, attrs["exclude"])
30     if ilen > 0 and elen == 0:
31         return include
32     elif elen > 0 and ilen == 0:
33         return not exclude
34     elif ilen > 0 and elen > 0:
35         if ilen > elen:
36             if include:
37                 return True
38             else:
39                 return not exclude
40         elif elen > ilen:
41             if exclude:
42                 return False
43             else:
44                 return include
45     return True
46     
47     
48 def extract(file, classs, target):
49     source = XmlExtractor()
50     source.openInput(file)
51     
52     # open output file
53     out_file = open(target, "w")
54     
55     source.requireChild("document")
56     (ctl,elem, value, attrs) = source.extractElement(XMLE.EC_BEG)
57     while ctl != XMLE.EC_END:
58         if elem != "text":
59             raise EnvironmentError("<text> node expected")
60         if is_for_print(classs, attrs):
61             out_file.write(value+"\n")
62         (ctl,elem, value, attrs) = source.extractElement(ctl)
63     return 0
64
65