001    /* DTD.java --
002       Copyright (C) 2005 Free Software Foundation, Inc.
003    
004    This file is part of GNU Classpath.
005    
006    GNU Classpath is free software; you can redistribute it and/or modify
007    it under the terms of the GNU General Public License as published by
008    the Free Software Foundation; either version 2, or (at your option)
009    any later version.
010    
011    GNU Classpath is distributed in the hope that it will be useful, but
012    WITHOUT ANY WARRANTY; without even the implied warranty of
013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014    General Public License for more details.
015    
016    You should have received a copy of the GNU General Public License
017    along with GNU Classpath; see the file COPYING.  If not, write to the
018    Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
019    02110-1301 USA.
020    
021    Linking this library statically or dynamically with other modules is
022    making a combined work based on this library.  Thus, the terms and
023    conditions of the GNU General Public License cover the whole
024    combination.
025    
026    As a special exception, the copyright holders of this library give you
027    permission to link this library with independent modules to produce an
028    executable, regardless of the license terms of these independent
029    modules, and to copy and distribute the resulting executable under
030    terms of your choice, provided that you also meet, for each linked
031    independent module, the terms and conditions of the license of that
032    module.  An independent module is a module which is not derived from
033    or based on this library.  If you modify this library, you may extend
034    this exception to your version of the library, but you are not
035    obligated to do so.  If you do not wish to do so, delete this
036    exception statement from your version. */
037    
038    
039    package javax.swing.text.html.parser;
040    
041    import java.io.DataInputStream;
042    import java.io.EOFException;
043    import java.io.IOException;
044    import java.io.ObjectInputStream;
045    import java.lang.reflect.Field;
046    import java.lang.reflect.Modifier;
047    import java.util.BitSet;
048    import java.util.Hashtable;
049    import java.util.StringTokenizer;
050    import java.util.Vector;
051    
052    /**
053     * <p>Representation or the SGML DTD document.
054     * Provides basis for describing a syntax of the
055     * HTML documents. The fields of this class are NOT initialized in
056     * constructor. You need to do this separately before passing this data
057     * structure to the HTML parser. The subclasses with the fields, pre-
058     * initialized, for example, for HTML 4.01, can be available only between
059     * the implementation specific classes
060     * ( for example, {@link gnu.javax.swing.text.html.parser.HTML_401F }
061     * in this implementation).</p>
062     * <p>
063     * If you need more information about SGML DTD documents,
064     * the author suggests to read SGML tutorial on
065     * <a href="http://www.w3.org/TR/WD-html40-970708/intro/sgmltut.html"
066     * >http://www.w3.org/TR/WD-html40-970708/intro/sgmltut.html</a>.
067     * We also recommend Goldfarb C.F (1991) <i>The SGML Handbook</i>,
068     * Oxford University Press, 688 p, ISBN: 0198537379.
069     * </p>
070     * <p>
071     * Warning: the html, head and other tag fields will only be automatically
072     * assigned if the VM has the correctly implemented reflection mechanism.
073     * As these fields are not used anywhere in the implementation, not
074     * exception will be thrown in the opposite case.
075     * </p>
076     *
077     * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
078     */
079    public class DTD
080      implements DTDConstants
081    {
082      /**
083       * The version of the persistent data format.
084       * @specnote This was made <code>final</code> in 1.5.
085       */
086      public static final int FILE_VERSION = 1;
087    
088      /**
089       * The table of existing available DTDs.
090       */
091      static Hashtable<String,DTD> dtdHash = new Hashtable<String,DTD>();
092    
093      /**
094       * The applet element for this DTD.
095       */
096      public Element applet;
097    
098      /**
099       * The base element for this DTD.
100       */
101      public Element base;
102    
103      /**
104       * The body element for this DTD.
105       */
106      public Element body;
107    
108      /**
109       * The head element for this DTD.
110       */
111      public Element head;
112    
113      /**
114       * The html element for this DTD.
115       */
116      public Element html;
117    
118      /**
119       * The isindex element of for this DTD.
120       */
121      public Element isindex;
122    
123      /**
124       * The meta element for this DTD.
125       */
126      public Element meta;
127    
128      /**
129       * The p element for this DTD.
130       */
131      public Element p;
132    
133      /**
134       * The param element for this DTD.
135       */
136      public Element param;
137    
138      /**
139       * The pcdata for this DTD.
140       */
141      public Element pcdata;
142    
143      /**
144       * The title element for this DTD.
145       */
146      public Element title;
147    
148      /**
149       * The element for accessing all DTD elements by name.
150       */
151      public Hashtable<String,Element> elementHash =
152        new Hashtable<String,Element>();
153    
154      /**
155       * The entity table for accessing all DTD entities by name.
156       */
157      public Hashtable<Object, Entity> entityHash = new Hashtable<Object, Entity>();
158    
159      /**
160       *  The name of this DTD.
161       */
162      public String name;
163    
164      /**
165       * Contains all elements in this DTD. The
166       * javax.swing.text.html.parser.Element#index field of all elements
167       * in this vector is set to the element position in this vector.
168       */
169      public Vector<Element> elements = new Vector<Element>();
170    
171      /** Create a new DTD with the specified name. */
172      protected DTD(String a_name)
173      {
174        name = a_name;
175      }
176    
177      /** Get this DTD by name. The current implementation
178       * only looks in the internal table of DTD documents. If no corresponding
179       * entry is found, the new entry is created, placed into
180       * the table and returned. */
181      public static DTD getDTD(String name)
182                        throws IOException
183      {
184        DTD d = dtdHash.get(name);
185    
186        if (d == null)
187          {
188            d = new DTD(name);
189            dtdHash.put(d.name, d);
190          }
191    
192        return d;
193      }
194    
195      /**
196       * Get the element by the element name. If the element is not yet
197       * defined, it is newly created and placed into the element table.
198       * If the element name matches (ingoring case) a public non static
199       * element field in this class, this field is assigned to the value
200       * of the newly created element.
201       */
202      public Element getElement(String element_name)
203      {
204        return newElement(element_name);
205      }
206    
207      /**
208       * Get the element by the value of its
209       * {@link javax.swing.text.html.parser.Element#index} field.
210       */
211      public Element getElement(int index)
212      {
213        return elements.get(index);
214      }
215    
216      /**
217       * Get the entity with the given identifier.
218       * @param id that can be returned by
219       * {@link javax.swing.text.html.parser.Entity#name2type(String an_entity)}
220       * @return The entity from this DTD or null if there is no entity with
221       * such id or such entity is not present in the table of this instance.
222       */
223      public Entity getEntity(int id)
224      {
225        String name = Entity.mapper.get(id);
226    
227        if (name != null)
228          return entityHash.get(name);
229        else
230          return null;
231      }
232    
233      /**
234       * Get the named entity by its name.
235       */
236      public Entity getEntity(String entity_name)
237      {
238        return entityHash.get(entity_name);
239      }
240    
241      /**
242       * Get the name of this instance of DTD
243       */
244      public String getName()
245      {
246        return name;
247      }
248    
249      /**
250       * Creates, adds into the entity table and returns the
251       * character entity like <code>&amp;lt;</code>
252       *  (means '<code>&lt;</code>' );
253       * @param name The entity name (without heading &amp; and closing ;)
254       * @param type The entity type
255       * @param character The entity value (single character)
256       * @return The created entity
257       */
258      public Entity defEntity(String name, int type, int character)
259      {
260        Entity e = newEntity(name, type);
261        e.data = new char[] { (char) character };
262        return e;
263      }
264    
265      /**
266       * Define the attributes for the element with the given name.
267       * If the element is not exist, it is created.
268       * @param forElement
269       * @param attributes
270       */
271      public void defineAttributes(String forElement, AttributeList attributes)
272      {
273        Element e = elementHash.get(forElement.toLowerCase());
274    
275        if (e == null)
276          e = newElement(forElement);
277    
278        e.atts = attributes;
279      }
280    
281      /**
282       * Defines the element and adds it to the element table. Sets the
283       * <code>Element.index</code> field to the value, unique for this
284       * instance of DTD. If the element with the given name already exists,
285       * replaces all other its settings by the method argument values.
286       * @param name the name of the element
287       * @param type the type of the element
288       * @param headless true if the element needs no starting tag
289       * (should not occur in HTML).
290       * @param tailless true if the element needs no ending tag (like
291       * <code>&lt;hr&gt;</code>
292       * @param content the element content
293       * @param exclusions the set of elements that must not occur inside
294       * this element. The <code>Element.index</code> value defines which
295       * bit in this bitset corresponds to that element.
296       * @param inclusions the set of elements that can occur inside this
297       * element. the <code>Element.index</code> value defines which
298       * bit in this bitset corresponds to that element.
299       * @param attributes the element attributes.
300       * @return the newly defined element.
301       */
302      public Element defineElement(String name, int type, boolean headless,
303                                   boolean tailless, ContentModel content,
304                                   BitSet exclusions, BitSet inclusions,
305                                   AttributeList attributes
306                                  )
307      {
308        Element e = newElement(name);
309        e.type = type;
310        e.oStart = headless;
311        e.oEnd = tailless;
312        e.content = content;
313        e.exclusions = exclusions;
314        e.inclusions = inclusions;
315        e.atts = attributes;
316    
317        return e;
318      }
319    
320      /**
321       * Creates, intializes and adds to the entity table the new
322       * entity.
323       * @param name the name of the entity
324       * @param type the type of the entity
325       * @param data the data section of the entity
326       * @return the created entity
327       */
328      public Entity defineEntity(String name, int type, char[] data)
329      {
330        Entity e = newEntity(name, type);
331        e.data = data;
332    
333        return e;
334      }
335    
336      /** Place this DTD into the DTD table. */
337      public static void putDTDHash(String name, DTD dtd)
338      {
339        dtdHash.put(name, dtd);
340      }
341    
342      /**
343       * <p>Reads DTD from an archived format. This format is not standardized
344       * and differs between implementations.</p><p> This implementation
345       * reads and defines all entities and elements using
346       * ObjectInputStream. The elements and entities can be written into the
347       * stream in any order. The objects other than elements and entities
348       * are ignored.</p>
349       * @param stream A data stream to read from.
350       * @throws java.io.IOException If one is thrown by the input stream
351       */
352      public void read(DataInputStream stream)
353                throws java.io.IOException
354      {
355        ObjectInputStream oi = new ObjectInputStream(stream);
356        Object def;
357        try
358          {
359            while (true)
360              {
361                def = oi.readObject();
362                if (def instanceof Element)
363                  {
364                    Element e = (Element) def;
365                    elementHash.put(e.name.toLowerCase(), e);
366                    assignField(e);
367                  }
368                else if (def instanceof Entity)
369                  {
370                    Entity e = (Entity) def;
371                    entityHash.put(e.name, e);
372                  }
373              }
374          }
375        catch (ClassNotFoundException ex)
376          {
377            throw new IOException(ex.getMessage());
378          }
379        catch (EOFException ex)
380          {
381            // ok EOF
382          }
383      }
384    
385      /**
386       * Returns the name of this instance of DTD.
387       */
388      public String toString()
389      {
390        return name;
391      }
392    
393      /**
394       * Creates and returns new attribute (not an attribute list).
395       * @param name the name of this attribute
396       * @param type the type of this attribute (FIXED, IMPLIED or
397       * REQUIRED from <code>DTDConstants</code>).
398       * @param modifier the modifier of this attribute
399       * @param default_value the default value of this attribute
400       * @param allowed_values the allowed values of this attribute. The multiple
401       * possible values in this parameter are supposed to be separated by
402       * '|', same as in SGML DTD <code>&lt;!ATTLIST </code>tag. This parameter
403       * can be null if no list of allowed values is specified.
404       * @param atts the previous attribute of this element. This is
405       * placed to the field
406       * {@link javax.swing.text.html.parser.AttributeList#next },
407       * creating a linked list.
408       * @return The attributes.
409       */
410      protected AttributeList defAttributeList(String name, int type, int modifier,
411                                               String default_value,
412                                               String allowed_values,
413                                               AttributeList atts
414                                              )
415      {
416        AttributeList al = new AttributeList(name);
417        al.modifier = modifier;
418        al.value = default_value;
419        al.next = atts;
420    
421        if (allowed_values != null)
422          {
423            StringTokenizer st = new StringTokenizer(allowed_values, " \t|");
424            Vector<String> v = new Vector<String>(st.countTokens());
425    
426            while (st.hasMoreTokens())
427              v.add(st.nextToken());
428    
429            al.values = v;
430          }
431    
432        return al;
433      }
434    
435      /**
436       * Creates a new content model.
437       * @param type specifies the BNF operation for this content model.
438       * The valid operations are documented in the
439       * {@link javax.swing.text.html.parser.ContentModel#type }.
440       * @param content the content of this content model
441       * @param next if the content model is specified by BNF-like
442       * expression, contains the rest of this expression.
443       * @return The newly created content model.
444       */
445      protected ContentModel defContentModel(int type, Object content,
446                                             ContentModel next
447                                            )
448      {
449        ContentModel model = new ContentModel();
450        model.type = type;
451        model.next = next;
452        model.content = content;
453    
454        return model;
455      }
456    
457      /**
458       * Defines a new element and adds it to the element table.
459       * If the element alredy exists,
460       * overrides it settings with the specified values.
461       * @param name the name of the new element
462       * @param type the type of the element
463       * @param headless true if the element needs no starting tag
464       * @param tailless true if the element needs no closing tag
465       * @param content the element content.
466       * @param exclusions the elements that must be excluded from the
467       * content of this element, in all levels of the hierarchy.
468       * @param inclusions the elements that can be included as the
469       * content of this element.
470       * @param attributes the element attributes.
471       * @return the created or updated element.
472       */
473      protected Element defElement(String name, int type, boolean headless,
474                                   boolean tailless, ContentModel content,
475                                   String[] exclusions, String[] inclusions,
476                                   AttributeList attributes
477                                  )
478      {
479        // compute the bit sets
480        BitSet exclude = bitSet(exclusions);
481        BitSet include = bitSet(inclusions);
482    
483        Element e =
484          defineElement(name, type, headless, tailless, content, exclude, include,
485                        attributes
486                       );
487    
488        return e;
489      }
490    
491      /**
492       * Creates, intializes and adds to the entity table the new
493       * entity.
494       * @param name the name of the entity
495       * @param type the type of the entity
496       * @param data the data section of the entity
497       * @return the created entity
498       */
499      protected Entity defEntity(String name, int type, String data)
500      {
501        Entity e = newEntity(name, type);
502        e.data = data.toCharArray();
503    
504        return e;
505      }
506    
507      private void assignField(Element e)
508      {
509        String element_name = e.name;
510        try
511          {
512            // Assign the field via reflection.
513            Field f = getClass().getField(element_name.toLowerCase());
514            if ((f.getModifiers() & Modifier.PUBLIC) != 0)
515              if ((f.getModifiers() & Modifier.STATIC) == 0)
516                if (f.getType().isAssignableFrom(e.getClass()))
517                  f.set(this, e);
518          }
519        catch (IllegalAccessException ex)
520          {
521            unexpected(ex);
522          }
523        catch (NoSuchFieldException ex)
524          {
525            // This is ok.
526          }
527    
528        // Some virtual machines may still lack the proper
529        // implementation of reflection. As the tag fields
530        // are not used anywhere in this implementation,
531        // (and this class is also rarely used by the end user),
532        // it may be better not to crash everything by throwing an error
533        // for each case when the HTML parsing is required.
534        catch (Throwable t)
535          {
536            // This VM has no reflection mechanism implemented!
537            if (t instanceof OutOfMemoryError)
538              throw (Error) t;
539          }
540      }
541    
542      /**
543       * Create the bit set for this array of elements.
544       * The unknown elements are automatically defined and added
545       * to the element table.
546       * @param elements
547       * @return The bit set.
548       */
549      private BitSet bitSet(String[] elements)
550      {
551        BitSet b = new BitSet();
552    
553        for (int i = 0; i < elements.length; i++)
554          {
555            Element e = getElement(elements [ i ]);
556    
557            if (e == null)
558              e = newElement(elements [ i ]);
559    
560            b.set(e.index);
561          }
562    
563        return b;
564      }
565    
566      /**
567       * Find the element with the given name in the element table.
568       * If not find, create a new element with this name and add to the
569       * table.
570       * @param name the name of the element
571       * @return the found or created element.
572       */
573      private Element newElement(String name)
574      {
575        Element e = elementHash.get(name.toLowerCase());
576    
577        if (e == null)
578          {
579            e = new Element();
580            e.name = name;
581            e.index = elements.size();
582            elements.add(e);
583            elementHash.put(e.name.toLowerCase(), e);
584            assignField(e);
585          }
586        return e;
587      }
588    
589      /**
590       * Creates and adds to the element table the entity with an
591       * unitialized data section. Used internally.
592       * @param name the name of the entity
593       * @param type the type of the entity, a bitwise combination
594       * of GENERAL, PARAMETER, SYSTEM and PUBLIC.
595       *
596       * @return the created entity
597       */
598      private Entity newEntity(String name, int type)
599      {
600        Entity e = new Entity(name, type, null);
601        entityHash.put(e.name, e);
602        return e;
603      }
604    
605      private void unexpected(Exception ex)
606      {
607        throw new Error("This should never happen, report a bug", ex);
608      }
609    }