<?xml version="1.0"?>
<!DOCTYPE bioml SYSTEM "bioml.dtd">
<bioml label="Insulin, gene and protein structure">
	&paragraph;This BIOML file contains a simple set of information about the
	protein insulin. It is not meant to be an exhaustive study of insulin. Instead
	it is meant to be a demonstration of the organization of a BIOML file. To advance to the
	next item, either select it with the mouse or press the "DOWN" button.
	<organism label="Homo sapiens (human)">
		<chromosome label="Chromosome 11" number="11">
			&paragraph; 
			The chromosome entry indicates that the locus that contains the insulin gene
			has been localized to the number 11 chromosome in humans. All of the
			entries linked to this entry are localized on this chromosome.
			<locus label="HUMINS locus">
				 &paragraph;
				 The HUMINS locus contains all of the sequence information necessary to code for
				 insulin. A locus can be completely known, or only fragments may be known. In
				 many BIOML files, the locus will contain a gene (or genes) of interest.
				<reference label="Sequence databases">
					&paragraph;
					The locus and gene sequence information used in this file were retrieved from
					these database entries. To see the entries, either press the "RIGHT" arrow key
					to open up this entry, or double click on it with the mouse. To close this entry
					again, either use the "LEFT arrow key or double click on it again.
					&paragraph;
					To retrieve the full database entry, move the cursor to the entry you want, and
					then press the "RIGHT" button, or double click on the entry. The database entry
					contains a lot of information, but it is laid out in a rather cryptic fashion. If
					you are interested, you can read through the files and try to figure out what the
					entries all mean, or you can just continue and get the information from the BIOML
					file.
					&paragraph;
					Move the cursor down to the "Insulin gene" entry to see the sequence of the 
					gene. The exons will be highlight in alternating colors. Open up the gene entry
					to see the organization of the gene and select any entry to see that region
					hightlighted in the display.
					<db_entry label="Genbank sequence" entry="v00565" format="GENBANK"/>
					<db_entry label="EMBL sequence" format="EMBL" entry="V00565"/>
				</reference>
				<reference label="Literature references">
					 &paragraph;
					This entry contains links to Medline abstracts that are relavent to the 
					primary structure of insulin. Open up the entries, select one you want to see
					and then either use the "RIGHT" arrow or a double click to fetch the abstract
					from Medline. You need a connection to the Internet to use these entries. In
					general, any yellow icon that changes its shape when you select it will
					connect you with the Internet to get information.
					<db_entry label="Insulin gene sequence" format="MEDLINE" entry="80120725"/>
					<db_entry label="Insulin mRNA sequence" format="MEDLINE" entry="80236313"/>
					<db_entry label="Localization to Chromosome 11" format="MEDLINE" entry="93364428"/>
				</reference>
				<gene label="Insulin gene">
					<dna label="Complete HUMINS sequence" start="1" end="4992">
						        1 ctcgaggggc ctagacattg ccctccagag agagcaccca acaccctcca ggcttgaccg
						       61 gccagggtgt ccccttccta ccttggagag agcagcccca gggcatcctg cagggggtgc
						      121 tgggacacca gctggccttc aaggtctctg cctccctcca gccaccccac tacacgctgc
						      181 tgggatcctg gatctcagct ccctggccga caacactggc aaactcctac tcatccacga
						      241 aggccctcct gggcatggtg gtccttccca gcctggcagt ctgttcctca cacaccttgt
						      301 tagtgcccag cccctgaggt tgcagctggg ggtgtctctg aagggctgtg agcccccagg
						      361 aagccctggg gaagtgcctg ccttgcctcc ccccggccct gccagcgcct ggctctgccc
						      421 tcctacctgg gctcccccca tccagcctcc ctccctacac actcctctca aggaggcacc
						      481 catgtcctct ccagctgccg ggcctcagag cactgtggcg tcctggggca gccaccgcat
						      541 gtcctgctgt ggcatggctc agggtggaaa gggcggaagg gaggggtcct gcagatagct
						      601 ggtgcccact accaaacccg ctcggggcag gagagccaaa ggctgggtgt gtgcagagcg
						      661 gccccgagag gttccgaggc tgaggccagg gtgggacata gggatgcgag gggccggggc
						      721 acaggatact ccaacctgcc tgcccccatg gtctcatcct cctgcttctg ggacctcctg
						      781 atcctgcccc tggtgctaag aggcaggtaa ggggctgcag gcagcagggc tcggagccca
						      841 tgccccctca ccatgggtca ggctggacct ccaggtgcct gttctgggga gctgggaggg
						      901 ccggaggggt gtaccccagg ggctcagccc agatgacact atgggggtga tggtgtcatg
						      961 ggacctggcc aggagagggg agatgggctc ccagaagagg agtgggggct gagagggtgc
						     1021 ctggggggcc aggacggagc tgggccagtg cacagcttcc cacacctgcc cacccccaga
						     1081 gtcctgccgc cacccccaga tcacacggaa gatgaggtcc gagtggcctg ctgaggactt
						     1141 gctgcttgtc cccaggtccc caggtcatgc cctccttctg ccaccctggg gagctgaggg
						     1201 cctcagctgg ggctgctgtc ctaaggcagg gtgggaacta ggcagccagc agggagggga
						     1261 cccctccctc actcccactc tcccaccccc accaccttgg cccatccatg gcggcatctt
						     1321 gggccatccg ggactgggga caggggtcct ggggacaggg gtccggggac agggtcctgg
						     1381 ggacaggggt gtggggacag gggtctgggg acaggggtgt ggggacaggg gtgtggggac
						     1441 aggggtctgg ggacaggggt gtggggacag gggtccgggg acaggggtgt ggggacaggg
						     1501 gtctggggac aggggtgtgg ggacaggggt gtggggacag gggtctgggg acaggggtgt
						     1561 ggggacaggg gtcctgggga caggggtgtg gggacagggg tgtggggaca ggggtgtggg
						     1621 gacaggggtg tggggacagg ggtcctgggg ataggggtgt ggggacaggg gtgtggggac
						     1681 aggggtcccg gggacagggg tgtggggaca ggggtgtggg gacaggggtc ctggggacag
						     1741 gggtctgagg acaggggtgt gggcacaggg gtcctgggga caggggtcct ggggacaggg
						     1801 gtcctgggga caggggtctg gggacagcag cgcaaagagc cccgccctgc agcctccagc
						     1861 tctcctggtc taatgtggaa agtggcccag gtgagggctt tgctctcctg gagacatttg
						     1921 cccccagctg tgagcaggga caggtctggc caccgggccc ctggttaaga ctctaatgac
						     1981 ccgctggtcc tgaggaagag gtgctgacga ccaaggagat cttcccacag acccagcacc
						     2041 agggaaatgg tccggaaatt gcagcctcag cccccagcca tctgccgacc cccccacccc
						     2101 gccctaatgg gccaggcggc aggggttgac aggtagggga gatgggctct gagactataa
						     2161 agccagcggg ggcccagcag ccctcagccc tccaggacag gctgcatcag aagaggccat
						     2221 caagcaggtc tgttccaagg gcctttgcgt caggtgggct cagggttcca gggtggctgg
						     2281 accccaggcc ccagctctgc agcagggagg acgtggctgg gctcgtgaag catgtggggg
						     2341 tgagcccagg ggccccaagg cagggcacct ggccttcagc ctgcctcagc cctgcctgtc
						     2401 tcccagatca ctgtccttct gccatggccc tgtggatgcg cctcctgccc ctgctggcgc
						     2461 tgctggccct ctggggacct gacccagccg cagcctttgt gaaccaacac ctgtgcggct
						     2521 cacacctggt ggaagctctc tacctagtgt gcggggaacg aggcttcttc tacacaccca
						     2581 agacccgccg ggaggcagag gacctgcagg gtgagccaac cgcccattgc tgcccctggc
						     2641 cgcccccagc caccccctgc tcctggcgct cccacccagc atgggcagaa gggggcagga
						     2701 ggctgccacc cagcaggggg tcaggtgcac ttttttaaaa agaagttctc ttggtcacgt
						     2761 cctaaaagtg accagctccc tgtggcccag tcagaatctc agcctgagga cggtgttggc
						     2821 ttcggcagcc ccgagataca tcagagggtg ggcacgctcc tccctccact cgcccctcaa
						     2881 acaaatgccc cgcagcccat ttctccaccc tcatttgatg accgcagatt caagtgtttt
						     2941 gttaagtaaa gtcctgggtg acctggggtc acagggtgcc ccacgctgcc tgcctctggg
						     3001 cgaacacccc atcacgcccg gaggagggcg tggctgcctg cctgagtggg ccagacccct
						     3061 gtcgccagcc tcacggcagc tccatagtca ggagatgggg aagatgctgg ggacaggccc
						     3121 tggggagaag tactgggatc acctgttcag gctcccactg tgacgctgcc ccggggcggg
						     3181 ggaaggaggt gggacatgtg ggcgttgggg cctgtaggtc cacacccagt gtgggtgacc
						     3241 ctccctctaa cctgggtcca gcccggctgg agatgggtgg gagtgcgacc tagggctggc
						     3301 gggcaggcgg gcactgtgtc tccctgactg tgtcctcctg tgtccctctg cctcgccgct
						     3361 gttccggaac ctgctctgcg cggcacgtcc tggcagtggg gcaggtggag ctgggcgggg
						     3421 gccctggtgc aggcagcctg cagcccttgg ccctggaggg gtccctgcag aagcgtggca
						     3481 ttgtggaaca atgctgtacc agcatctgct ccctctacca gctggagaac tactgcaact
						     3541 agacgcagcc tgcaggcagc cccacacccg ccgcctcctg caccgagaga gatggaataa
						     3601 agcccttgaa ccagccctgc tgtgccgtct gtgtgtcttg ggggccctgg gccaagcccc
						     3661 acttcccggc actgttgtga gcccctccca gctctctcca cgctctctgg gtgcccacag
						     3721 gtgccaacgc caggcaggcc cagcatgcag tggctctccc caaagcggcc atgcctgttg
						     3781 gctgcctgct gcccccaccc tgtggctcag ggtccagtat gggagcttcg ggggtctctg
						     3841 aggggccagg gatggtgggg ccactgagaa gtgactctgt cagtagccga cctggagtcc
						     3901 ccagagacct tgttcaggaa agggaatgag aacattccag caattttccc cccacctagc
						     3961 cctcccaggt tctattttta gagttatttc tgatggagtc cctgtggagg gaggaggctg
						     4021 ggctgaggga gggggtcctg cagggcgggg ggctgggaag gtggggagag gctgccgaga
						     4081 gccacccgct atccccagct ctgggcagcc ccgggacagt cacacaccct ggcctcgcgg
						     4141 cccaagctgg cagccgtctg cagccacagc ttatgccagc ccaggtccag ccagacacct
						     4201 gagggaccca ctggtgcctt ggaggaagca ggagaggtca gatggcacca tgagctgggg
						     4261 caggtgcagg gaccgtggca gcacctggca gggcctcaga acccatgcct tgggcacccc
						     4321 ggccatgagg ccctgaggat tgcagcccaa gagaagcagg gaacgccagg gccacagggg
						     4381 cagagaccag gccagggtcc cttgcggccc ttagcccacc ccctcccagt aagcaggggc
						     4441 tgcttggcta ggcttccttt tgctacagac ctgctgctca cccagaggcc cacgggccct
						     4501 agtgacaagg tcgttgtggc tccaggtcct tgggggtcct gacacagagc ctcttctgca
						     4561 gcacccctga ggacagggtg ctccgctggg cacccagcct agtgggcaga cgagaaccta
						     4621 ggggctgcct gggcctactg tggcctggga ggtcagcggg tgaccctagc taccctgtgg
						     4681 ctgggccagt ctgcctgcca cccaggccaa accaatctgc acctttcctg agagctccac
						     4741 ccagggctgg gctggggatg gctgggcctg gggctggcat gggctgtggc tgcagaccac
						     4801 tgccagcttg ggcctcgagg ccaggagctc accctccagc tgccccgcct ccagagtggg
						     4861 ggccagggct gggcaggcgg gtggacggcc ggacactggc cccggaagag gagggaggcg
						     4921 gtggctggga tcggcagcag ccgtccatgg gaacacccag ccggccccac tcgcacgggt
						     4981 agagacaggc gc
						<ddomain label="flanking domain" start="1" end="2185"/>
						<ddomain label="polymorphic domain" start="1340" end="1823"/>
						<ddomain label="Signal peptide" start="2424" end="2495"/>
						<ddomain label="Chain B" start="2496" end="2585"/>
						<ddomain label="Chain C(1)" start="2586" end="2610"/>
						<ddomain label="Chain C(2)" start="3397" end="3476"/>
						<ddomain label="Chain A" start="3477" end="3539"/>
						<exon label="Exon 1" start="2186" end="2227"/>
						<intron label="Intron 1" start="2228" end="2406"/>
						<exon label="Exon 2" start="2407" end="2610"/>
						<intron label="Intron 2" start="2611" end="3396"/>
						<exon label="Exon 3" start="3397" end="3615"/>
						<ddomain label="flanking domain" start="3615" end="4992"/>
					</dna>
				</gene>
			</locus>
		</chromosome>
		<tissue label="Pancreas and plasma tissue">
			&paragraph;
			In the same way that the 'chromosome' entry above contains entries
			that describe things located on that chromosome, 'tissue' entries
			contain things found in that type of tissue. It is possible with
			BIOML to specify the location of an object in an organism down to
			the subcellular particle that contains it.
			&paragraph;
			In the case of insulin, it is made in the pancreas and secreted into
			the plasma (the cell-free portion of blood). 
			<protein label="Insulin protein" comp="6xS[1]">
				&paragraph;
				Insulin is probably the most well-known protein. Insulin stimulates the
				transport of glucose from the outside to the inside of cells. This function
				allows it to regulate the level of glucose present in the blood. A reduced
				ability to produce insulin produces diabetes, which can only be treated by
				the injection of insulin.
				&paragraph;
				All of the entries that are attached to this one are about insulin. Insulin
				is initially translated as a prepropeptide that is larger than the final, active
				form of insulin. The signal peptide is clipped off almost immediately. The protein 
				then folds and the disulphide bonds are formed. Then the C-peptide is removed and
				the two chains that are left attached by disulphide bonds comprise active insulin.
				&paragraph;
				By scrolling down through the entries below, you will be able to see the various
				chains and their linkages. The red lines connect cysteine residues involved in 
				disulphide bonds. The 'squiggles' underneath the sequence indicate the regions
				that are known to be alpha helices. Beta sheet strands are indicated by colored
				thick lines. Turns are indicated by thin lines.
				<subunit id="1" label="Mature insulin: A and B chains" comp="1xP[1]D[2]+1xP[1]D[4]">
					<reference label="Sequence databases">
						 &paragraph;
						 The sequence shown was based using information from sequence databases. The database information
						 can be retrieved using the entries under this heading, but they are not really
						 designed to be read by humans very easily. If you have never looked at one before,
						 take a look and see what is there. All of the information can be easily captured
						 in a BIOML file, but because this file is a simplified demonstration, many
						 pieces on information have been purposely left out of this file.
						<db_entry label="SWISSPROT entry" id="1" entry="INS_HUMAN" format="SWISSPROT"/>
						<db_entry label="PIR entry" id="2" entry="A01579" format="PIR"/>
					</reference>
					<peptide label="Preproinsulin" id="1" end="110" start="1">
						<domain label="Signal" id="1" end="24" start="1">
							            MALWMRLLPLLALLALWGPDPAAA
						</domain>
						<domain label="B Chain" id="2" end="54" start="25">
							FVNQHLCGSHLVEALYLVCGERGFFYTPKT
							<aa at="31" type="C" to="96"/>
							<aa at="34" type="H">
								<avariant type="D"/>
							</aa>
							<aa at="43" type="C" to="109"/>
							<aa at="48" type="F">
								<avariant type="S"/>
							</aa>
							<aa at="49" type="F">
								<avariant type="L"/>
							</aa>
						</domain>
						<domain label="C Chain (propeptide)" id="3" end="89" start="55">
							RREAEDLQVGQVELGGGPGAGSLQPLALEGSLQKR
							<aa at="89" type="R">
								<avariant type="H"/>
								<avariant type="L"/>
							</aa>
						</domain>
						<domain label="A Chain" id="4" end="110" start="90">
							GIVEQCCTSICSLYQLENYCN
							<aa at="109" type="C" to="43"/>
							<aa at="92" type="V">
								<avariant type="L"/>
							</aa>
							<aa at="95" type="C" to="100"/>
							<aa at="96" type="C" to="31"/>
							<aa at="100" type="C" to="95"/>
						</domain>
						<domain type="TURN" start="32" end="32"/>
						<domain type="HELIX" start="33" end="46"/>
						<domain type="STRAND" start="48" end="50"/>
						<domain type="HELIX" start="91" end="95"/>
						<domain type="TURN" start="96" end="97"/>
						<domain type="HELIX" start="102" end="108"/>
						<domain type="STRAND" start="109" end="109"/>
					</peptide>
				</subunit>
			</protein>
		</tissue>
	</organism>
	
	<!-- 
	<file label="WWW_insulin" URL="http://www.altavista.digital.com/cgi-bin/query?pg=q&kl=XX&q=insulin+structure" format="HTML">
		&paragraph;
		This entry just performs a simple WWW search for information about insulin. This particular
		search isn't terribly useful, but it demonstrates a simple way to store something that points
		towards a search engine.
	</file>
	<copyright label="1998 ProteoMetrics, LLC">
		&cr;Copyright &copyright; 1998 ProteoMetrics, LLC. All rights reserved.	</copyright>
	-->
	</bioml>

