-
Notifications
You must be signed in to change notification settings - Fork 0
/
interpro.dtd
124 lines (124 loc) · 5.35 KB
/
interpro.dtd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
<!-- edited with XML Spy v4.4 U (http://www.xmlspy.com) by ALEX KANAPIN (EMBL OUTSTATION THE EBI) -->
<!-- Root element of InterPro database-->
<!ELEMENT interprodb (release | interpro+ | deleted_entries)*>
<!-- Release infromation-->
<!ELEMENT release (dbinfo)+>
<!--The dbinfo block is used to store release information about the referenced databases, either member databases such as PFAM, or databases that are used in the production of InterPro such as TrEMBL. At least one of the release or date attributes should be present.-->
<!ELEMENT dbinfo EMPTY>
<!ATTLIST dbinfo
dbname NMTOKEN #REQUIRED
version CDATA #IMPLIED
entry_count CDATA #IMPLIED
file_date CDATA #IMPLIED
>
<!--The abstract, a manually curated free text area containing a summary of the current state of knowledge about the patterns that make up this InterPro entry. Layout markup within this block is converted to XML literal characters during the post-processing of the Oracle dump.-->
<!ENTITY % tags "#PCDATA | cite | db_xref | taxon | reaction | p | ul | li | ol | sup | sub | pre | i | b">
<!ELEMENT abstract ( %tags; )*>
<!ELEMENT p ( %tags; )*>
<!ELEMENT ul ( %tags; )*>
<!ELEMENT li ( %tags; )*>
<!ELEMENT ol ( %tags; )*>
<!ELEMENT sup ( %tags; )*>
<!ELEMENT pre ( %tags; )*>
<!ELEMENT i (#PCDATA)>
<!ELEMENT sub (#PCDATA)>
<!ELEMENT b ( %tags; )*>
<!ELEMENT author_list (#PCDATA)>
<!ELEMENT book_title (#PCDATA)>
<!ELEMENT category (#PCDATA)>
<!ELEMENT child_list (rel_ref)+>
<!ELEMENT cite EMPTY>
<!ATTLIST cite
idref CDATA #REQUIRED
>
<!ELEMENT class_list (classification+)>
<!--Represents classification in the Gene Ontology (www.geneontology.org), a heirarchical classification of gene product location, encapsulation and function.-->
<!ELEMENT classification (category, description)>
<!ATTLIST classification
id CDATA #REQUIRED
class_type CDATA #REQUIRED
>
<!ELEMENT contains (rel_ref)+>
<!ELEMENT db_xref EMPTY>
<!ATTLIST db_xref
db (GENPROP | INTACT | BLOCKS | CATH | CAZY | COG | COMe | EC | GO | INTERPRO | IUPHAR | MEROPS | MSDsite | PANDIT | PDB | PFAM | PIRSF | PRINTS | PRODOM | PROFILE | PROSITE | PROSITEDOC | PUBMED | SCOP | SMART | SMODEL | SSF | SWISSPROT | TIGRFAMs | TIGRFAMS | TREMBL | PANTHER | GENE3D | HAMAP |TC | PRIAM ) #REQUIRED
version CDATA #IMPLIED
dbkey CDATA #IMPLIED
name CDATA #IMPLIED
protein_count CDATA #IMPLIED
>
<!--This element contains the accession number of a single deleted InterPro entry.-->
<!ELEMENT del_ref (description)*>
<!ATTLIST del_ref
id CDATA #REQUIRED
>
<!--If present, this contains a list of deleted IPRs-->
<!ELEMENT deleted_entries (del_ref)+>
<!--Generic description node, just contains a block of text. Meaning depends upon relative context.-->
<!ELEMENT description (#PCDATA)>
<!ELEMENT example_list (example)*>
<!ELEMENT example (#PCDATA | db_xref)*>
<!--Examples of this InterPro entry hitting proteins from SWISS-PROT and TrEMBL.-->
<!ELEMENT external_doc_list (db_xref)+>
<!ELEMENT structure_db_links (db_xref)+>
<!ELEMENT taxonomy_distribution (taxon_data)+>
<!ELEMENT taxon_data (#PCDATA)>
<!ELEMENT found_in (rel_ref)+>
<!ELEMENT interpro (name | sec_list? | abstract | class_list? | example_list | pub_list | external_doc_list? | member_list | parent_list? | child_list? | contains* | found_in* | structure_db_links* | taxonomy_distribution*)+>
<!ATTLIST interpro
id CDATA #REQUIRED
type NMTOKEN #REQUIRED
short_name CDATA #REQUIRED
protein_count CDATA #REQUIRED
>
<!ATTLIST taxon_data
name CDATA #REQUIRED
proteins_count CDATA #REQUIRED
>
<!ELEMENT journal (#PCDATA)>
<!ELEMENT location EMPTY>
<!ATTLIST location
pages CDATA #IMPLIED
volume CDATA #IMPLIED
issue CDATA #IMPLIED
>
<!ELEMENT member_list (db_xref)+>
<!--This is actually a description of the entry, and not the name. The short name is held in the attribute list of the 'interpro' element in order to make parsing through the file for a given name more efficient.-->
<!ELEMENT name (#PCDATA)>
<!--Note - changed name of node from 'parlist' to 'parent_list'-->
<!ELEMENT parent_list (rel_ref)>
<!ELEMENT protein EMPTY>
<!ATTLIST protein
id CDATA #REQUIRED
>
<!--A list of publications used within this InterPro entry.-->
<!ELEMENT pub_list (publication)*>
<!--Represents a single published source of data used by the InterPro entry. It is referenced by the 'cite' tag within the abstract and various other places. This replaces the over-loaded publication tag that we had before and allows a much cleaner looking schema.-->
<!ELEMENT publication (author_list | title? | db_xref? | journal? | book_title? | location? | url? | doi_url? | year)+>
<!ATTLIST publication
id CDATA #IMPLIED
>
<!ELEMENT reaction ( %tags; )*>
<!--This is a reference to another InterPro entry-->
<!ELEMENT rel_ref EMPTY>
<!ATTLIST rel_ref
ipr_ref CDATA #REQUIRED
type CDATA #IMPLIED
>
<!--This block stores information that is specific to this release of the XML file.-->
<!ELEMENT sec_ac EMPTY>
<!ATTLIST sec_ac
acc CDATA #REQUIRED
>
<!--Secondary accession numbers are stored in this list.-->
<!ELEMENT sec_list (sec_ac+)>
<!ELEMENT taxon (#PCDATA)>
<!ATTLIST taxon
tax_id CDATA #IMPLIED
>
<!ELEMENT title (#PCDATA)>
<!ELEMENT type (#PCDATA)>
<!--This should contain a URL for an online resource relevent to the given publication. Note that the type is now restricted to the w3c schema uriReference, and that any contents must therefore comply with the definition for this type.-->
<!ELEMENT url (#PCDATA)>
<!ELEMENT doi_url (#PCDATA)>
<!ELEMENT year (#PCDATA)>