HTML Standard Tracker

Diff (omit for latest revision)
Filter

Short URL: http://html5.org/r/2322

File a bug

SVNBugCommentTime (UTC)
2322WF2: Form submission encoding algorithms and related fallout. Also, change 'int' to 'long' in IDL blocks.2008-10-11 01:37
Index: source
===================================================================
--- source	(revision 2321)
+++ source	(revision 2322)
@@ -20082,8 +20082,8 @@
 };
 
 interface <dfn>ImageData</dfn> {
-  readonly attribute unsigned long int <span title="dom-imagedata-width">width</span>;
-  readonly attribute unsigned long int <span title="dom-imagedata-height">height</span>;
+  readonly attribute unsigned long <span title="dom-imagedata-width">width</span>;
+  readonly attribute unsigned long <span title="dom-imagedata-height">height</span>;
   readonly attribute <span>CanvasPixelArray</span> <span title="dom-imagedata-data">data</span>;
 };
 
@@ -25545,8 +25545,8 @@
            attribute float <span title="dom-input-valueAsNumber">valueAsNumber</span>;
   readonly attribute <span>HTMLOptionElement</span> <span title="dom-input-selectedOption">selectedOption</span>;
 
-  void <span title="dom-input-stepUp">stepUp</span>(in int n);
-  void <span title="dom-input-stepDown">stepDown</span>(in int n);
+  void <span title="dom-input-stepUp">stepUp</span>(in long n);
+  void <span title="dom-input-stepDown">stepDown</span>(in long n);
 
   readonly attribute boolean <span title="dom-cva-willValidate">willValidate</span>;
   readonly attribute <span>ValidityState</span> <span title="dom-cva-validity">validity</span>;
@@ -31100,7 +31100,7 @@
    order</span>.</p></li>
 
    <li><p>Let the <var title="">form data set</var> be a list of
-   name/value pairs, initially empty.</p></li>
+   name-value-type tuples, initially empty.</p></li>
 
    <li id="constructing-form-data-set">
 
@@ -31153,6 +31153,10 @@
 
      </li>
 
+     <li><p>Let <var title="">type</var> be the value of the <code
+     title="">type</code> DOM attribute of <var
+     title="">field</var>.</p></li>
+
      <li>
 
       <p>If the <var title="">field</var> element is an
@@ -31191,12 +31195,14 @@
        user.</p></li>
 
        <li><p>Append an entry in the <var title="">form data set</var>
-       with the name <var title="">name<sub title="">x</sub></var> and
-       the value <var title="">x</var>.</p></li>
+       with the name <var title="">name<sub title="">x</sub></var>,
+       the value <var title="">x</var>, and the type <var
+       title="">type</var>.</p></li>
 
        <li><p>Append an entry in the <var title="">form data set</var>
        with the name <var title="">name<sub title="">y</sub></var> and
-       the value <var title="">y</var>.</p></li>
+       the value <var title="">y</var>, and the type <var
+       title="">type</var>.</p></li>
 
        <li><p>Skip the remaining substeps for this element: if there
        are any more elements in <var title="">controls</var>, return
@@ -31227,9 +31233,10 @@
      element in the <code>select</code> element whose <span
      title="concept-option-selectedness">selectedness</span> is true,
      append an entry in the <var title="">form data set</var> with the
-     <var title="">name</var> as the name and the <span
+     <var title="">name</var> as the name, the <span
      title="concept-option-value">value</span> of the
-     <code>option</code> element as the value.</p></li>
+     <code>option</code> element as the value, and <var
+     title="">type</var> as the type.</p></li>
 
      <li>
 
@@ -31249,8 +31256,9 @@
        "<code title="">on</code>".</p></li>
 
        <li><p>Append an entry in the <var title="">form data set</var>
-       with <var title="">name</var> as the name and <var
-       title="">value</var> as the value.</p></li>
+       with <var title="">name</var> as the name, <var
+       title="">value</var> as the value, and <var title="">type</var>
+       as the type.</p></li>
 
       </ol>
 
@@ -31264,13 +31272,15 @@
      title="concept-input-type-file-selected">selected</span> in the
      <code>input</code> element, append an entry in the <var
      title="">form data set</var> with the <var title="">name</var> as
-     the name and the file (consisting of the name, the type, and the
-     body) as the value.</p></li>
+     the name, the file (consisting of the name, the type, and the
+     body) as the value, and <var title="">type</var> as the
+     type.</p></li>
 
      <li><p>Otherwise, append an entry in the <var title="">form data
-     set</var> with <var title="">name</var> as the name and the <span
+     set</var> with <var title="">name</var> as the name, the <span
      title="concept-fe-value">value</span> of the <var
-     title="">field</var> element as the value.</p></li>
+     title="">field</var> element as the value, and <var
+     title="">type</var> as the type.</p></li>
 
     </ol>
 
@@ -31374,10 +31384,10 @@
      <dt><dfn title="submit-mutate-action">Mutate action</dfn>
      <dd>
 
-      <p>Let <var title="">query</var> be the resulting encoding the
+      <p>Let <var title="">query</var> be the result of encoding the
       <var title="">form data set</var> using the <span><code
       title="">application/x-www-form-urlencoded</code> encoding
-      algorithm</span>.</p>
+      algorithm</span>, interpreted as a US-ASCII string.</p>
 
       <p>Let <var title="">destination</var> be a new <span>URL</span>
       that is equal to the <var title="">action</var> except that its
@@ -31406,13 +31416,35 @@
       <p>Let <var title="">target browsing context</var> be <span>the
       form submission target browsing context</span>.</p>
 
+      <p>Let <var title="">MIME type</var> be determined as
+      follows:</p>
+
+      <dl>
+
+       <dt>If <var title="">enctype</var> is <code title="attr-fs-enctype-urlencoded">application/x-www-form-urlencoded</code></dt>
+
+       <dd>Let <var title="">MIME type</var> be "<code
+       title="">application/x-www-form-urlencoded</code>".</dd>
+
+       <dt>If <var title="">enctype</var> is <code title="attr-fs-enctype-formdata">multpart/form-data</code></dt>
+
+       <dd>Let <var title="">MIME type</var> be "<code
+       title="">multipart/form-data</code>".</dd>
+
+       <dt>If <var title="">enctype</var> is <code title="attr-fs-enctype-text">text/plain</code></dt>
+
+       <dd>Let <var title="">MIME type</var> be "<code title="">text/plain</code>".</dd>
+
+      </dl>
+
       <p><span>Navigate</span> <var title="">target browsing
       context</var> to <var title="">action</var> using the HTTP
       method given by <var title="">method</var> and with <var
-      title="">entity body</var> as the entity body. If <var
-      title="">target browsing context</var> was newly created for
-      this purpose by the steps above, then it must be navigated with
-      <span>replacement enabled</span>.</p>
+      title="">entity body</var> as the entity body, of type <var
+      title="">MIME type</var>. If <var title="">target browsing
+      context</var> was newly created for this purpose by the steps
+      above, then it must be navigated with <span>replacement
+      enabled</span>.</p>
 
      </dd>
 
@@ -31453,21 +31485,25 @@
 
       <p>If <var title="">action</var> contains the string "<code
       title="">%%%%</code>" (four U+0025 PERCENT SIGN characters),
-      then %-escape all characters in <var title="">data</var> that do
-      not match the <code title="">unreserved</code> production in the
-      URI Generic Syntax, and then further %-escape all the U+0025
-      PERCENT SIGN characters in the resulting string, and replace the
-      first occurance of "<code title="">%%%%</code>" in <var
-      title="">action</var> with the resulting double-escaped
-      string. <a href="#refsRFC3986">[RFC3986]</a></p>
+      then %-escape all bytes in <var title="">data</var> that, if
+      interpreted as US-ASCII, do not match the <code
+      title="">unreserved</code> production in the URI Generic Syntax,
+      and then, treating the result as a US-ASCII string, further
+      %-escape all the U+0025 PERCENT SIGN characters in the resulting
+      string and replace the first occurance of "<code
+      title="">%%%%</code>" in <var title="">action</var> with the
+      resulting double-escaped string. <a
+      href="#refsRFC3986">[RFC3986]</a></p>
 
       <p>Otherwise, if <var title="">action</var> contains the string
       "<code title="">%%</code>" (two U+0025 PERCENT SIGN characters
       in a row, but not four), then %-escape all characters in <var
-      title="">data</var> that do not match the <code
-      title="">unreserved</code> production in the URI Generic Syntax,
-      and replace the first occurance of "<code title="">%%</code>" in
-      <var title="">action</var> with the resulting escaped string. <a
+      title="">data</var> that, if interpreted as US-ASCII, do not
+      match the <code title="">unreserved</code> production in the URI
+      Generic Syntax, and then, treating the result as a US-ASCII
+      string, replace the first occurance of "<code
+      title="">%%</code>" in <var title="">action</var> with the
+      resulting escaped string. <a
       href="#refsRFC3986">[RFC3986]</a></p>
 
       <p>Let <var title="">target browsing context</var> be <span>the
@@ -31543,7 +31579,7 @@
       <p>Let <var title="">headers</var> be the resulting encoding the
       <var title="">form data set</var> using the <span><code
       title="">application/x-www-form-urlencoded</code> encoding
-      algorithm</span>.</p>
+      algorithm</span>, interpreted as a US-ASCII string.</p>
 
       <p>Replace occurances of U+002B PLUS SIGN characters (+) in <var
       title="">headers</var> with the string "<code
@@ -31577,10 +31613,10 @@
 
       <p>Let <var title="">body</var> be the resulting encoding the
       <var title="">form data set</var> using the <span>appropriate
-      form encoding algorithm</span> and then %-escaping all the
-      characters in the resulting string that do not match the <code
-      title="">unreserved</code> production in the URI Generic
-      Syntax. <a href="#refsRFC3986">[RFC3986]</a></p>
+      form encoding algorithm</span> and then %-escaping all the bytes
+      in the resulting byte string that, when interpreted as US-ASCII,
+      do not match the <code title="">unreserved</code> production in
+      the URI Generic Syntax. <a href="#refsRFC3986">[RFC3986]</a></p>
       
       <p>Let <var title="">destination</var> have the same value as
       <var title="">action</var>.</p>
@@ -31594,8 +31630,8 @@
       <p>Append the string "<code title="">body=</code>" to <var
       title="">destination</var>.</p>
 
-      <p>Append <var title="">body</var> to <var
-      title="">destination</var>.</p>
+      <p>Append <var title="">body</var>, interpreted as a US-ASCII
+      string, to <var title="">destination</var>.</p>
 
       <p>Let <var title="">target browsing context</var> be <span>the
       form submission target browsing context</span>.</p>
@@ -31658,31 +31694,131 @@
 
   <ol>
 
-   <li><p class="XXX">...</p></li>
+   <li><p>Let <var title="">result</var> be the empty string.</p></li>
 
-   <!-- During this step, the form data set is examined to ensure all
-   the characters are representable in the submission character
-   encoding. -->
+   <li>
 
+    <p>If the <code>form</code> element has an <code
+    title="attr-form-accept-charset">accept-charset</code> attribute,
+    then, taking into account the characters found in the <var
+    title="">form data set</var>'s names and values, and the character
+    encodings supported by the user agent, select a character encoding
+    from the list given in the <code>form</code>'s <code
+    title="attr-form-accept-charset">accept-charset</code> attribute
+    that is an <span>ASCII-compatible character encoding</span>. If
+    none of the encodings are supported, then let the selected
+    character encoding be UTF-8.</p>
+
+    <p>Otherwise, if the <span>document's character encoding</span> is
+    an <span>ASCII-compatible character encoding</span>, then that is
+    the selected character encoding.</p>
+
+    <p>Otherwise, let the selected character encoding be UTF-8.</p>
+
+   </li>
+
+   <li><p>Let <var title="">charset</var> be the preferred MIME name
+   of the selected character encoding.</p></li>
+
+   <li><p>If the entry's name is "<code title="">_charset_</code>" and
+   its type is "<code title="">hidden</code>", replace its value with
+   <var title="">charset</var>.</p></li>
+
+   <li><p>If the entry's type is "<code title="">file</code>", replace
+   its value with the file's filename only.</p></li>
+
+   <li>
+
+    <p>For each entry in the <var title="">form data set</var>,
+    perform these substeps:</p>
+
+    <ol>
+
+     <li><p>For each character in the entry's name and value that
+     cannot be expressed using the selected character encoding,
+     replace the character by a string consisting of a U+0026
+     AMPERSAND character (&amp;), one of more characters in the range
+     U+0030 DIGIT ZERO (0) to U+0039 DIGIT NINE (9) representing the
+     Unicode codepoint of the character in base ten, and finally a
+     U+003B SEMICOLON character (;).</p></li>
+
+     <li>
+
+      <p>For each character in the entry's name and value, apply the
+      following subsubsteps:</p>
+
+      <ol>
+
+       <!-- * - . _ 0-9 a-z A-Z -->
+
+       <li><p>If the character isn't in the range U+0020, U+002A,
+       U+002D, U+002E, U+0030 .. U+0039, U+0041 .. U+005A, U+005F,
+       U+0061 .. U+007A then replace the character with a string
+       formed as follows: Start with the empty string, and then,
+       taking each byte of the character when expressed in the
+       selected character encoding in turn, append to the string a
+       U+0025 PERCENT SIGN character (%) followed by two characters in
+       the ranges U+0030 DIGIT ZERO (0) to U+0039 DIGIT NINE (9) and
+       U+0041 LATIN CAPITAL LETTER A to U+005A LATIN CAPITAL LETTER Z
+       representing the hexadecimal value of the byte (zero-padded if
+       necessary).</p></li>
+
+       <li><p>If the character is a U+0020 SPACE character, replace it
+       with a single U+002B PLUS SIGN character (+).</p></li>
+
+      </ol>
+
+     </li>
+
+     <li><p>If the entry's name is "<code title="">isindex</code>",
+     its type is "<code title="">text</code>", and this is the first
+     entry in the <var title="">form data set</var>, then append the
+     value to <var title="">result</var> and skip the rest of the
+     substeps for this entry, moving on to the next entry, if any, or
+     the next step in the overall algorithm otherwise.</p></li>
+
+     <li><p>If this is not the first entry, append a single U+0026
+     AMPERSAND character (&amp;) to <var
+     title="">result</var>.</p></li>
+
+     <li><p>Append the entry's name to <var
+     title="">result</var>.</p></li>
+
+     <li><p>Append a single U+003D EQUALS SIGN character (=) to <var
+     title="">result</var>.</p></li>
+
+     <li><p>Append the entry's value to <var
+     title="">result</var>.</p></li>
+
+    </ol>
+
+   </li>
+
+   <li><p>Encode <var title="">result</var> as US-ASCII and return the
+   resulting byte stream.</p></li>
+
   </ol>
 
 
   <h5>Multipart form data</h5>
 
   <p>The <dfn><code title="">multipart/form-data</code> encoding
-  algorithm</dfn> is as follows:</p>
+  algorithm</dfn> is to encode the <var title="">form data set</var>
+  using the rules described by RFC2388, <cite>Returning Values from
+  Forms: <code title="">multipart/form-data</code></cite>, and return
+  the resulting byte stream. <a href="#refsRFC2388">[RFC2388]</a></p>
 
-  <ol>
+  <p>Each entry in the <var title="">form data set</var> is a
+  <i>field</i>, the name of the entry is the <i>field name</i> and the
+  value of the entry is the <i>field value</i>.</p>
 
-   <li><p class="XXX">...</p></li>
+  <p>The order of parts must be the same as the order of fields in the
+  <var title="">form data set</var>. Multiple entries with the same
+  name must be treated as distinct fields.</p>
 
-   <!-- During this step, the form data set is examined to ensure all
-   the characters are representable in the submission character
-   encoding. -->
+  <!-- XXX define default encoding? -->
 
-  </ol>
 
-
   <h5>Plain text form data</h5>
 
   <p>The <dfn><code title="">text/plain</code> encoding
@@ -31690,12 +31826,64 @@
 
   <ol>
 
-   <li><p class="XXX">...</p></li>
+   <li><p>Let <var title="">result</var> be the empty string.</p></li>
 
-   <!-- During this step, the form data set is examined to ensure all
-   the characters are representable in the submission character
-   encoding. -->
+   <li>
 
+    <!-- this is different from application/x-www-form-urlencoded in
+    that it isn't limited to ASCII-compatible encodings -->
+
+    <p>If the <code>form</code> element has an <code
+    title="attr-form-accept-charset">accept-charset</code> attribute,
+    then, taking into account the characters found in the <var
+    title="">form data set</var>'s names and values, and the character
+    encodings supported by the user agent, select a character encoding
+    from the list given in the <code>form</code>'s <code
+    title="attr-form-accept-charset">accept-charset</code>
+    attribute. If none of the encodings are supported, then let the
+    selected character encoding be UTF-8.</p>
+
+    <p>Otherwise, the selected character encoding is the
+    <span>document's character encoding</span>.</p>
+
+   </li>
+
+   <li><p>Let <var title="">charset</var> be the preferred MIME name
+   of the selected character encoding.</p></li>
+
+   <li><p>If the entry's name is "<code title="">_charset_</code>" and
+   its type is "<code title="">hidden</code>", replace its value with
+   <var title="">charset</var>.</p></li>
+
+   <li><p>If the entry's type is "<code title="">file</code>", replace
+   its value with the file's filename only.</p></li>
+
+   <li>
+
+    <p>For each entry in the <var title="">form data set</var>,
+    perform these substeps:</p>
+
+    <ol>
+
+     <li><p>Append the entry's name to <var
+     title="">result</var>.</p></li>
+
+     <li><p>Append a single U+003D EQUALS SIGN character (=) to <var
+     title="">result</var>.</p></li>
+
+     <li><p>Append the entry's value to <var
+     title="">result</var>.</p></li>
+
+     <li><p>Append a U+000D CARRIAGE RETURN (CR) U+000A LINE FEED (LF)
+     character pair to <var title="">result</var>.</p></li>
+
+    </ol>
+
+   </li>
+
+   <li><p>Encode <var title="">result</var> using the selected
+   character encoding and return the resulting byte stream.</p></li>
+
   </ol>
 
 
@@ -41741,8 +41929,8 @@
   as an argument.</p>
 
   <pre class="idl">interface <dfn>SQLResultSet</dfn> {
-  readonly attribute int <span title="dom-SQLResultSet-insertId">insertId</span>;
-  readonly attribute int <span title="dom-SQLResultSet-rowsAffected">rowsAffected</span>;
+  readonly attribute long <span title="dom-SQLResultSet-insertId">insertId</span>;
+  readonly attribute long <span title="dom-SQLResultSet-rowsAffected">rowsAffected</span>;
   readonly attribute <span>SQLResultSetRowList</span> <span title="dom-SQLResultSet-rows">rows</span>;
 };</pre>
 
@@ -41800,7 +41988,7 @@
   a <code>SQLError</code> object as one of their arguments.</p>
 
   <pre class="idl">interface <dfn>SQLError</dfn> {
-  readonly attribute unsigned int <span title="dom-SQLError-code">code</span>;
+  readonly attribute unsigned long <span title="dom-SQLError-code">code</span>;
   readonly attribute DOMString <span title="dom-SQLError-message">message</span>;
 };</pre>
 
@@ -47072,7 +47260,7 @@
   const unsigned short <span title="dom-WebSocket-CONNECTING">CONNECTING</span> = 0;
   const unsigned short <span title="dom-WebSocket-OPEN">OPEN</span> = 1;
   const unsigned short <span title="dom-WebSocket-CLOSED">CLOSED</span> = 2;
-  readonly attribute int <span title="dom-WebSocket-readyState">readyState</span>;
+  readonly attribute long <span title="dom-WebSocket-readyState">readyState</span>;
 
   // networking
            attribute EventListener <span title="handler-WebSocket-onopen">onopen</span>;
@@ -54314,11 +54502,6 @@
     keywords here: (input field)" in the user's preferred
     language.</p>
 
-    <p class="XXX"> Then need to specify that if the form
-    submission causes just a single form control, whose name is
-    "isindex", to be submitted, then we submit just the value part,
-    not the "isindex=" part.  </p>
-
    </dd>
 
 <!-- XXX keygen support; don't forget form element pointer!
@@ -57334,7 +57517,6 @@
        ("<code title="">foo</code>" vs <code>foo</code>)
  XXX * need to properly xref events throughout, mark up DOMActivate, etc
  XXX * onclick="" only fires if it is a MouseEvent ?
- XXX * <isindex> needs some prose in the form submission section
  XXX * hsivonen makes the following suggestions:
        > To make document conformance a more useful concept for the purpose of catching
        > author errors, I suggest that the following attributes be made required:

|