<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="ko">
	<id>https://wiki.mathnt.net/index.php?action=history&amp;feed=atom&amp;title=UTF-8</id>
	<title>UTF-8 - 편집 역사</title>
	<link rel="self" type="application/atom+xml" href="https://wiki.mathnt.net/index.php?action=history&amp;feed=atom&amp;title=UTF-8"/>
	<link rel="alternate" type="text/html" href="https://wiki.mathnt.net/index.php?title=UTF-8&amp;action=history"/>
	<updated>2026-04-05T02:44:00Z</updated>
	<subtitle>이 문서의 편집 역사</subtitle>
	<generator>MediaWiki 1.35.0</generator>
	<entry>
		<id>https://wiki.mathnt.net/index.php?title=UTF-8&amp;diff=51406&amp;oldid=prev</id>
		<title>2021년 2월 17일 (수) 08:28에 Pythagoras0님의 편집</title>
		<link rel="alternate" type="text/html" href="https://wiki.mathnt.net/index.php?title=UTF-8&amp;diff=51406&amp;oldid=prev"/>
		<updated>2021-02-17T08:28:46Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left diff-editfont-monospace&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;ko&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;← 이전 판&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;2021년 2월 17일 (수) 08:28 판&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l118&quot; &gt;118번째 줄:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;118번째 줄:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;  &amp;lt;references /&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;  &amp;lt;references /&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt;−&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;== 메타데이터 ==&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;==메타데이터==&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt;−&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt; &lt;/div&gt;&lt;/td&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;===위키데이터===&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;===위키데이터===&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* ID :  [https://www.wikidata.org/wiki/Q193537 Q193537]&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* ID :  [https://www.wikidata.org/wiki/Q193537 Q193537]&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;===Spacy 패턴 목록===&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;* [{&amp;#039;LEMMA&amp;#039;: &amp;#039;UTF-8&amp;#039;}]&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;* [{&amp;#039;LOWER&amp;#039;: &amp;#039;filesystem&amp;#039;}, {&amp;#039;LOWER&amp;#039;: &amp;#039;safe&amp;#039;}, {&amp;#039;LEMMA&amp;#039;: &amp;#039;utf&amp;#039;}]&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;* [{&amp;#039;LOWER&amp;#039;: &amp;#039;fss&amp;#039;}, {&amp;#039;OP&amp;#039;: &amp;#039;*&amp;#039;}, {&amp;#039;LEMMA&amp;#039;: &amp;#039;UTF&amp;#039;}]&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;* [{&amp;#039;LEMMA&amp;#039;: &amp;#039;UTF-2&amp;#039;}]&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;* [{&amp;#039;LOWER&amp;#039;: &amp;#039;utf&amp;#039;}, {&amp;#039;LEMMA&amp;#039;: &amp;#039;8u&amp;#039;}]&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;* [{&amp;#039;LEMMA&amp;#039;: &amp;#039;utf8&amp;#039;}]&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;* [{&amp;#039;LOWER&amp;#039;: &amp;#039;8-bit&amp;#039;}, {&amp;#039;LOWER&amp;#039;: &amp;#039;unicode&amp;#039;}, {&amp;#039;LOWER&amp;#039;: &amp;#039;transformation&amp;#039;}, {&amp;#039;LEMMA&amp;#039;: &amp;#039;format&amp;#039;}]&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;* [{&amp;#039;LOWER&amp;#039;: &amp;#039;unicode&amp;#039;}, {&amp;#039;LOWER&amp;#039;: &amp;#039;transformation&amp;#039;}, {&amp;#039;LOWER&amp;#039;: &amp;#039;format&amp;#039;}, {&amp;#039;OP&amp;#039;: &amp;#039;*&amp;#039;}, {&amp;#039;LEMMA&amp;#039;: &amp;#039;8-bit&amp;#039;}]&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;* [{&amp;#039;LOWER&amp;#039;: &amp;#039;unicode&amp;#039;}, {&amp;#039;LOWER&amp;#039;: &amp;#039;transformation&amp;#039;}, {&amp;#039;LOWER&amp;#039;: &amp;#039;format&amp;#039;}, {&amp;#039;OP&amp;#039;: &amp;#039;*&amp;#039;}, {&amp;#039;LEMMA&amp;#039;: &amp;#039;8-bit&amp;#039;}]&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Pythagoras0</name></author>
	</entry>
	<entry>
		<id>https://wiki.mathnt.net/index.php?title=UTF-8&amp;diff=47203&amp;oldid=prev</id>
		<title>Pythagoras0: /* 메타데이터 */ 새 문단</title>
		<link rel="alternate" type="text/html" href="https://wiki.mathnt.net/index.php?title=UTF-8&amp;diff=47203&amp;oldid=prev"/>
		<updated>2020-12-26T12:27:33Z</updated>

		<summary type="html">&lt;p&gt;&lt;span dir=&quot;auto&quot;&gt;&lt;span class=&quot;autocomment&quot;&gt;메타데이터: &lt;/span&gt; 새 문단&lt;/span&gt;&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left diff-editfont-monospace&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;ko&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;← 이전 판&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;2020년 12월 26일 (토) 12:27 판&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l117&quot; &gt;117번째 줄:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;117번째 줄:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;===소스===&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;===소스===&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;  &amp;lt;references /&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;  &amp;lt;references /&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;== 메타데이터 ==&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;===위키데이터===&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;* ID :  [https://www.wikidata.org/wiki/Q193537 Q193537]&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Pythagoras0</name></author>
	</entry>
	<entry>
		<id>https://wiki.mathnt.net/index.php?title=UTF-8&amp;diff=46107&amp;oldid=prev</id>
		<title>Pythagoras0: /* 노트 */ 새 문단</title>
		<link rel="alternate" type="text/html" href="https://wiki.mathnt.net/index.php?title=UTF-8&amp;diff=46107&amp;oldid=prev"/>
		<updated>2020-12-21T05:01:17Z</updated>

		<summary type="html">&lt;p&gt;&lt;span dir=&quot;auto&quot;&gt;&lt;span class=&quot;autocomment&quot;&gt;노트: &lt;/span&gt; 새 문단&lt;/span&gt;&lt;/p&gt;
&lt;p&gt;&lt;b&gt;새 문서&lt;/b&gt;&lt;/p&gt;&lt;div&gt;== 노트 ==&lt;br /&gt;
&lt;br /&gt;
===위키데이터===&lt;br /&gt;
* ID :  [https://www.wikidata.org/wiki/Q193537 Q193537]&lt;br /&gt;
===말뭉치===&lt;br /&gt;
# The first 128 UTF-8 characters precisely match the first 128 ASCII characters (numbered 0-127), meaning that existing ASCII text is already valid UTF-8.&amp;lt;ref name=&amp;quot;ref_31c05569&amp;quot;&amp;gt;[https://developer.mozilla.org/en-US/docs/Glossary/UTF-8 UTF-8 - MDN Web Docs Glossary: Definitions of Web-related terms]&amp;lt;/ref&amp;gt;&lt;br /&gt;
# A character in UTF8 can be from 1 to 4 bytes long.&amp;lt;ref name=&amp;quot;ref_53572a62&amp;quot;&amp;gt;[https://www.w3schools.com/charsets/ref_html_utf8.asp HTML UTF-8 Reference]&amp;lt;/ref&amp;gt;&lt;br /&gt;
# UTF-8 can represent any character in the Unicode standard.&amp;lt;ref name=&amp;quot;ref_53572a62&amp;quot; /&amp;gt;&lt;br /&gt;
# HTML 4 supports UTF-8.&amp;lt;ref name=&amp;quot;ref_53572a62&amp;quot; /&amp;gt;&lt;br /&gt;
# One of the really nice features of UTF-8 is that it is compatible with nul-terminated strings.&amp;lt;ref name=&amp;quot;ref_18dd92dc&amp;quot;&amp;gt;[https://www.fileformat.info/info/unicode/utf8.htm UTF-8 Encoding]&amp;lt;/ref&amp;gt;&lt;br /&gt;
# For characters equal to or below 2047 (hex 0x07FF), the UTF-8 representation is spread across two bytes.&amp;lt;ref name=&amp;quot;ref_18dd92dc&amp;quot; /&amp;gt;&lt;br /&gt;
# UTF-8 remains a simple, single-byte, ASCII-compatible encoding method, as long as no characters greater than 127 are directly present.&amp;lt;ref name=&amp;quot;ref_18dd92dc&amp;quot; /&amp;gt;&lt;br /&gt;
# A: Yes, there are several possible representations of Unicode data, including UTF-8, UTF-16 and UTF-32.&amp;lt;ref name=&amp;quot;ref_396e3afb&amp;quot;&amp;gt;[https://unicode.org/faq/utf_bom.html UTF-8, UTF-16, UTF-32 &amp;amp; BOM]&amp;lt;/ref&amp;gt;&lt;br /&gt;
# For example, in UTF-8 every byte of the form 110xxxxx 2 must be followed with a byte of the form 10xxxxxx 2 .&amp;lt;ref name=&amp;quot;ref_396e3afb&amp;quot; /&amp;gt;&lt;br /&gt;
# Latin-1. UTF-8 uses the bytes in the ASCII only for ASCII characters.&amp;lt;ref name=&amp;quot;ref_396e3afb&amp;quot; /&amp;gt;&lt;br /&gt;
# UTF-8 is the byte-oriented encoding form of Unicode.&amp;lt;ref name=&amp;quot;ref_396e3afb&amp;quot; /&amp;gt;&lt;br /&gt;
# In this post, I’ll explain the basics of one technology central to text on the web, UTF-8.&amp;lt;ref name=&amp;quot;ref_96563f48&amp;quot;&amp;gt;[https://blog.hubspot.com/website/what-is-utf-8 What is UTF-8 Encoding? A Guide for Non-Programmers]&amp;lt;/ref&amp;gt;&lt;br /&gt;
# UTF-8 is an encoding system for Unicode.&amp;lt;ref name=&amp;quot;ref_96563f48&amp;quot; /&amp;gt;&lt;br /&gt;
# There are other encoding systems for Unicode besides UTF-8, but UTF-8 is unique because it represents characters in one-byte units.&amp;lt;ref name=&amp;quot;ref_96563f48&amp;quot; /&amp;gt;&lt;br /&gt;
# More specifically, UTF-8 converts a code point (which represents a single character in Unicode) into a set of one to four bytes.&amp;lt;ref name=&amp;quot;ref_96563f48&amp;quot; /&amp;gt;&lt;br /&gt;
# The originally proposed encodings of the UCS, however, were not compatible with many current applications and protocols, and this has led to the development of UTF-8, the object of this memo.&amp;lt;ref name=&amp;quot;ref_53db38df&amp;quot;&amp;gt;[https://tools.ietf.org/html/rfc3629 UTF-8, a transformation format of ISO 10646]&amp;lt;/ref&amp;gt;&lt;br /&gt;
# All standard UCS encoding forms except UTF-8 have an encoding unit larger than one octet, making them hard to use in many current applications and protocols that assume 8 or even 7 bit characters.&amp;lt;ref name=&amp;quot;ref_53db38df&amp;quot; /&amp;gt;&lt;br /&gt;
# CESU-8 operates similarly to UTF-8 but encodes the UTF-16 code values (16-bit quantities) instead of the character number (code point).&amp;lt;ref name=&amp;quot;ref_53db38df&amp;quot; /&amp;gt;&lt;br /&gt;
# This leads to different results for character numbers above 0xFFFF; the CESU-8 encoding of those characters is NOT valid UTF-8.&amp;lt;ref name=&amp;quot;ref_53db38df&amp;quot; /&amp;gt;&lt;br /&gt;
# UTF-8 is a variable-width character encoding used for electronic communication.&amp;lt;ref name=&amp;quot;ref_0afa1de8&amp;quot;&amp;gt;[https://en.wikipedia.org/wiki/UTF-8 Wikipedia]&amp;lt;/ref&amp;gt;&lt;br /&gt;
# The Use of the main encodings on the web from 2001 to 2012 as recorded by Google,with UTF-8 overtaking all others in 2008 and over 60% of the web in 2012.&amp;lt;ref name=&amp;quot;ref_0afa1de8&amp;quot; /&amp;gt;&lt;br /&gt;
# The World Wide Web Consortium recommends UTF-8 as the default encoding in XML and HTML (and not just using UTF-8, also stating it in metadata), &amp;quot;even when all characters are in the ASCII range ..&amp;lt;ref name=&amp;quot;ref_0afa1de8&amp;quot; /&amp;gt;&lt;br /&gt;
# In locales where UTF-8 is used alongside another encoding, the latter is typically more efficient for the associated language.&amp;lt;ref name=&amp;quot;ref_0afa1de8&amp;quot; /&amp;gt;&lt;br /&gt;
# UTF-8 and Unicode Unicode Transformation Format 8-bit is a variable-width encoding that can represent every character in the Unicode character set.&amp;lt;ref name=&amp;quot;ref_c659fafb&amp;quot;&amp;gt;[http://www.utf-8.com/ UTF-8 and Unicode Standards]&amp;lt;/ref&amp;gt;&lt;br /&gt;
# UTF-8 encodes each Unicode character as a variable number of 1 to 4 octets, where the number of octets depends on the integer value assigned to the Unicode character.&amp;lt;ref name=&amp;quot;ref_c659fafb&amp;quot; /&amp;gt;&lt;br /&gt;
# The MIME character set attribute for UTF-8 is UTF-8 .&amp;lt;ref name=&amp;quot;ref_c659fafb&amp;quot; /&amp;gt;&lt;br /&gt;
# UTF-8 is a variable-width encoding that can represent every character in the Unicode character set.&amp;lt;ref name=&amp;quot;ref_451a2d11&amp;quot;&amp;gt;[https://www.toptal.com/php/a-utf-8-primer-for-php-and-mysql A Guide to UTF-8 Encoding in PHP and MySQL]&amp;lt;/ref&amp;gt;&lt;br /&gt;
# UTF-8 encodes each character using one to four bytes.&amp;lt;ref name=&amp;quot;ref_451a2d11&amp;quot; /&amp;gt;&lt;br /&gt;
# A is U+0041, which in UTF-8 is simply encoded with the single byte 41.&amp;lt;ref name=&amp;quot;ref_451a2d11&amp;quot; /&amp;gt;&lt;br /&gt;
# In comparison, the Unicode hexidecimal code for the character is U+233B4, which in UTF-8 is encoded with the four bytes F0 A3 8E B4.&amp;lt;ref name=&amp;quot;ref_451a2d11&amp;quot; /&amp;gt;&lt;br /&gt;
# , how UTF-8 decoders handle various types of | corrupted or otherwise interesting UTF-8 sequences.&amp;lt;ref name=&amp;quot;ref_733f8cdf&amp;quot;&amp;gt;[https://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html UTF-8 test file]&amp;lt;/ref&amp;gt;&lt;br /&gt;
# According to ISO 10646-1, sections R.7 and 2.3c, a device receiving | UTF-8 shall interpret a &amp;quot;malformed sequence in the same way that it | interprets a character that is outside the adopted subset&amp;quot;.&amp;lt;ref name=&amp;quot;ref_733f8cdf&amp;quot; /&amp;gt;&lt;br /&gt;
# This means | usually that the malformed UTF-8 sequence is replaced by a replacement | character (U+FFFD), which looks a bit like an inverted question mark, | or a similar symbol.&amp;lt;ref name=&amp;quot;ref_733f8cdf&amp;quot; /&amp;gt;&lt;br /&gt;
# It might be a good idea to visually distinguish a | malformed UTF-8 sequence from a correctly encoded Unicode character | that is just not available in the current font but otherwise fully | legal.&amp;lt;ref name=&amp;quot;ref_733f8cdf&amp;quot; /&amp;gt;&lt;br /&gt;
# The browser interprets those numbers as UTF-8, and internally converts them into Unicode code points.&amp;lt;ref name=&amp;quot;ref_801158f2&amp;quot;&amp;gt;[https://www.smashingmagazine.com/2012/06/all-about-unicode-utf8-character-sets/ Unicode, UTF8 &amp;amp; Character Sets: The Ultimate Guide — Smashing Magazine]&amp;lt;/ref&amp;gt;&lt;br /&gt;
# If you display the page using the UTF-8 character set, you will see only 3 characters: HЯ⾀.&amp;lt;ref name=&amp;quot;ref_801158f2&amp;quot; /&amp;gt;&lt;br /&gt;
# UTF-8 is becoming the most popular international character set on the Internet, superseding the older single-byte character sets like ISO-8859-5.&amp;lt;ref name=&amp;quot;ref_801158f2&amp;quot; /&amp;gt;&lt;br /&gt;
# Perhaps the Ð looks familiar - it will sometimes show up if you try to view Russian UTF-8 documents.&amp;lt;ref name=&amp;quot;ref_801158f2&amp;quot; /&amp;gt;&lt;br /&gt;
# UTF-8 uses one byte to represent code points from 0-127.&amp;lt;ref name=&amp;quot;ref_a3ebcda7&amp;quot;&amp;gt;[https://www.twilio.com/docs/glossary/what-utf-8 What is UTF-8?]&amp;lt;/ref&amp;gt;&lt;br /&gt;
# The first UTF-8 byte signals how many bytes will follow it.&amp;lt;ref name=&amp;quot;ref_a3ebcda7&amp;quot; /&amp;gt;&lt;br /&gt;
# UTF-8 pads the leading bits with three 0 s to fully “fill out” the remaining spaces.&amp;lt;ref name=&amp;quot;ref_a3ebcda7&amp;quot; /&amp;gt;&lt;br /&gt;
# The en_US.UTF-8 locale provides multiscript processing support by using UTF-8 as its codeset.&amp;lt;ref name=&amp;quot;ref_ac5097ae&amp;quot;&amp;gt;[https://docs.oracle.com/cd/E19683-01/806-6642/utf8-21349/index.html Chapter 5 Overview of UTF-8 Locale Support (International Language Environments Guide)]&amp;lt;/ref&amp;gt;&lt;br /&gt;
# After you install the Japanese locale, you can use ATOK12 in all UTF-8 locales.&amp;lt;ref name=&amp;quot;ref_ac5097ae&amp;quot; /&amp;gt;&lt;br /&gt;
# If your message contains characters from a mixture of scripts, the default MIME charset is UTF-8 .&amp;lt;ref name=&amp;quot;ref_ac5097ae&amp;quot; /&amp;gt;&lt;br /&gt;
# Any 8-bit characters of UTF-8 are encoded with Quoted-Printable encoding.&amp;lt;ref name=&amp;quot;ref_ac5097ae&amp;quot; /&amp;gt;&lt;br /&gt;
# The &amp;quot;I can eat glass&amp;quot; phrase and initial translations (about 30 of them) were borrowed from Ethan Mollick&amp;#039;s I Can Eat Glass page (which disappeared on or about June 2004) and converted to UTF-8.&amp;lt;ref name=&amp;quot;ref_cdcdcf32&amp;quot;&amp;gt;[http://www.columbia.edu/~fdc/utf8/ UTF-8 Sampler]&amp;lt;/ref&amp;gt;&lt;br /&gt;
# Kermit 95 displays UTF-8 and also allows keyboard entry of arbitrary Unicode BMP characters as 4 hex digits, as shown HERE.&amp;lt;ref name=&amp;quot;ref_cdcdcf32&amp;quot; /&amp;gt;&lt;br /&gt;
# EMACS 21.1 actually supports UTF-8; earlier versions don&amp;#039;t know about it and display the octal codes; either way is OK for this purpose.&amp;lt;ref name=&amp;quot;ref_cdcdcf32&amp;quot; /&amp;gt;&lt;br /&gt;
# UltraEdit / UEStudio provides support for Unicode (16-bit wide character, or UTF-16) and UTF-8 files.&amp;lt;ref name=&amp;quot;ref_31bf2c6d&amp;quot;&amp;gt;[https://www.ultraedit.com/wiki/Unicode_/_UTF-8_support Unicode / UTF-8 support]&amp;lt;/ref&amp;gt;&lt;br /&gt;
# You can directly edit UTF-8 and UTF-16 files and convert them between ANSI and Unicode formats.&amp;lt;ref name=&amp;quot;ref_31bf2c6d&amp;quot; /&amp;gt;&lt;br /&gt;
# String occurrences &amp;quot;charset=utf-8&amp;quot; or &amp;quot;encoding=utf-8&amp;quot; in the file.&amp;lt;ref name=&amp;quot;ref_31bf2c6d&amp;quot; /&amp;gt;&lt;br /&gt;
# If the file is determined to be UTF-8, it will be treated as such and on open, it will be converted internally to Unicode (16-bit) for editing.&amp;lt;ref name=&amp;quot;ref_31bf2c6d&amp;quot; /&amp;gt;&lt;br /&gt;
# This report shows the usage statistics of UTF-8 as character encoding on the web.&amp;lt;ref name=&amp;quot;ref_7bb7471d&amp;quot;&amp;gt;[https://w3techs.com/technologies/details/en-utf8 Usage Statistics and Market Share of UTF-8 for Websites, December 2020]&amp;lt;/ref&amp;gt;&lt;br /&gt;
# UTF-8 is a byte oriented encoding.&amp;lt;ref name=&amp;quot;ref_9575819d&amp;quot;&amp;gt;[https://docs.python.org/3/howto/unicode.html Unicode HOWTO — Python 3.9.1 documentation]&amp;lt;/ref&amp;gt;&lt;br /&gt;
# UTF-8 is one of the most commonly used encodings, and Python often defaults to using it.&amp;lt;ref name=&amp;quot;ref_9575819d&amp;quot; /&amp;gt;&lt;br /&gt;
# If you don’t include such a comment, the default encoding used will be UTF-8 as already mentioned.&amp;lt;ref name=&amp;quot;ref_9575819d&amp;quot; /&amp;gt;&lt;br /&gt;
# Python supports writing source code in UTF-8 by default, but you can use almost any encoding if you declare the encoding being used.&amp;lt;ref name=&amp;quot;ref_9575819d&amp;quot; /&amp;gt;&lt;br /&gt;
# WebSEAL implements multi-locale support by internally maintaining and handling all data using UCS Transformation Format 8 byte (UTF-8) encoding .&amp;lt;ref name=&amp;quot;ref_2942fe98&amp;quot;&amp;gt;[https://publib.boulder.ibm.com/tividd/td/ITAME/SC32-1359-00/en_US/HTML/am51_webseal_guide24.htm Multi-locale support with UTF-8]&amp;lt;/ref&amp;gt;&lt;br /&gt;
# WebSEAL handles data internally in UTF-8 regardless of the locale in which the WebSEAL process is running.&amp;lt;ref name=&amp;quot;ref_2942fe98&amp;quot; /&amp;gt;&lt;br /&gt;
# Note that most operating systems do not use UTF-8 by default.&amp;lt;ref name=&amp;quot;ref_2942fe98&amp;quot; /&amp;gt;&lt;br /&gt;
# Local code pages can by UTF-8 or not UTF-8.&amp;lt;ref name=&amp;quot;ref_2942fe98&amp;quot; /&amp;gt;&lt;br /&gt;
# The UTF-8 encoding of Unicode and UCS does not have these problems and is the common way in which Unicode is used on UNIX-style operating systems.&amp;lt;ref name=&amp;quot;ref_8793ed80&amp;quot;&amp;gt;[https://man7.org/linux/man-pages/man7/UTF-8.7.html Linux manual page]&amp;lt;/ref&amp;gt;&lt;br /&gt;
# All possible 2^31 UCS codes can be encoded using UTF-8.&amp;lt;ref name=&amp;quot;ref_8793ed80&amp;quot; /&amp;gt;&lt;br /&gt;
# The bytes 0xc0, 0xc1, 0xfe, and 0xff are never used in the UTF-8 encoding.&amp;lt;ref name=&amp;quot;ref_8793ed80&amp;quot; /&amp;gt;&lt;br /&gt;
# * UTF-8 encoded UCS characters may be up to six bytes long, however the Unicode standard specifies no characters above 0x10ffff, so Unicode characters can be only up to four bytes long in UTF-8.&amp;lt;ref name=&amp;quot;ref_8793ed80&amp;quot; /&amp;gt;&lt;br /&gt;
# Rust’s propensity for exposing possible errors, strings being a more complicated data structure than many programmers give them credit for, and UTF-8.&amp;lt;ref name=&amp;quot;ref_68c3396a&amp;quot;&amp;gt;[https://doc.rust-lang.org/book/ch08-02-strings.html The Rust Programming Language]&amp;lt;/ref&amp;gt;&lt;br /&gt;
# In Chapter 4, we talked about string slices, which are references to some UTF-8 encoded string data stored elsewhere.&amp;lt;ref name=&amp;quot;ref_68c3396a&amp;quot; /&amp;gt;&lt;br /&gt;
# a growable, mutable, owned, UTF-8 encoded string type.&amp;lt;ref name=&amp;quot;ref_68c3396a&amp;quot; /&amp;gt;&lt;br /&gt;
# Let’s look at some of our properly encoded UTF-8 example strings from Listing 8-14.&amp;lt;ref name=&amp;quot;ref_68c3396a&amp;quot; /&amp;gt;&lt;br /&gt;
# utf-8 , where utf is short for unicode transformation format, is a method of encoding unicode characters using one to four bytes per character.&amp;lt;ref name=&amp;quot;ref_217efd5e&amp;quot;&amp;gt;[https://wiki.tcl-lang.org/page/utf-8 utf-8]&amp;lt;/ref&amp;gt;&lt;br /&gt;
# Internally, Tcl uses modified utf-8 encoding, which is the same as utf-8 except that the NUL character (\u0000) is encoded as the bytes 0xC0 0x80, which is not a valid utf-8 sequence.&amp;lt;ref name=&amp;quot;ref_217efd5e&amp;quot; /&amp;gt;&lt;br /&gt;
# This doesn&amp;#039;t play well with encoding convertto utf-8 though, as that will reencode each surrogate in the pair as a separate character.&amp;lt;ref name=&amp;quot;ref_217efd5e&amp;quot; /&amp;gt;&lt;br /&gt;
# Overview ▾ Package utf8 implements functions and constants to support text encoded in UTF-8.&amp;lt;ref name=&amp;quot;ref_89fd9dac&amp;quot;&amp;gt;[https://golang.org/pkg/unicode/utf8/ The Go Programming Language]&amp;lt;/ref&amp;gt;&lt;br /&gt;
# It includes functions to translate between runes and UTF-8 byte sequences.&amp;lt;ref name=&amp;quot;ref_89fd9dac&amp;quot; /&amp;gt;&lt;br /&gt;
# DecodeLastRune unpacks the last UTF-8 encoding in p and returns the rune and its width in bytes.&amp;lt;ref name=&amp;quot;ref_89fd9dac&amp;quot; /&amp;gt;&lt;br /&gt;
# An encoding is invalid if it is incorrect UTF-8, encodes a rune that is out of range, or is not the shortest possible UTF-8 encoding for the value.&amp;lt;ref name=&amp;quot;ref_89fd9dac&amp;quot; /&amp;gt;&lt;br /&gt;
# Create a UTF-8 encoding.&amp;lt;ref name=&amp;quot;ref_b5bdb900&amp;quot;&amp;gt;[https://docs.microsoft.com/en-us/dotnet/api/system.text.utf8encoding UTF8Encoding Class (System.Text)]&amp;lt;/ref&amp;gt;&lt;br /&gt;
# Text Class Example Public Shared Sub Main() &amp;#039; Create a UTF-8 encoding.&amp;lt;ref name=&amp;quot;ref_b5bdb900&amp;quot; /&amp;gt;&lt;br /&gt;
# Create a UTF-8 encoding that supports a BOM.&amp;lt;ref name=&amp;quot;ref_b5bdb900&amp;quot; /&amp;gt;&lt;br /&gt;
# Text Class Example Public Shared Sub Main() &amp;#039; Create a UTF-8 encoding that supports a BOM.&amp;lt;ref name=&amp;quot;ref_b5bdb900&amp;quot; /&amp;gt;&lt;br /&gt;
# UTF-8 is a standard for representing Unicode numbers in computer files.&amp;lt;ref name=&amp;quot;ref_244d18c8&amp;quot;&amp;gt;[https://www.hesa.ac.uk/support/user-guides/xml-files/unicode Common Unicode and UTF-8 issues]&amp;lt;/ref&amp;gt;&lt;br /&gt;
# Therefore it is good practice to prefix a UTF-8 file with three special bytes, called the Byte Order Mark header (BOM header).&amp;lt;ref name=&amp;quot;ref_244d18c8&amp;quot; /&amp;gt;&lt;br /&gt;
# The HESA data collection system always outputs its UTF-8 files with BOM headers.&amp;lt;ref name=&amp;quot;ref_244d18c8&amp;quot; /&amp;gt;&lt;br /&gt;
# It is strongly recommended that institutions use UTF-8 BOM headers in their submitted XML files.&amp;lt;ref name=&amp;quot;ref_244d18c8&amp;quot; /&amp;gt;&lt;br /&gt;
# So, the first UTF-8 byte is used for encoding ASCII, giving the character set full backwards compatibility with ASCII.&amp;lt;ref name=&amp;quot;ref_6bed847b&amp;quot;&amp;gt;[https://wiki.gentoo.org/wiki/UTF-8 Gentoo Wiki]&amp;lt;/ref&amp;gt;&lt;br /&gt;
# UTF-8 means that ASCII and Latin characters are interchangeable with little increase in the size of the data, because only the first byte is used.&amp;lt;ref name=&amp;quot;ref_6bed847b&amp;quot; /&amp;gt;&lt;br /&gt;
# UTF-8 allows users to work in a standards-compliant and internationally accepted multilingual environment, with a comparatively low data redundancy.&amp;lt;ref name=&amp;quot;ref_6bed847b&amp;quot; /&amp;gt;&lt;br /&gt;
# Despite this, many people regard UTF-8 in online communication as abusive.&amp;lt;ref name=&amp;quot;ref_6bed847b&amp;quot; /&amp;gt;&lt;br /&gt;
# For more information, see Section 10.9.2, “The utf8mb3 Character Set (3-Byte UTF-8 Unicode Encoding)”.&amp;lt;ref name=&amp;quot;ref_892f07eb&amp;quot;&amp;gt;[https://dev.mysql.com/doc/refman/5.6/en/charset-unicode-utf8.html MySQL :: MySQL 5.6 Reference Manual :: 10.9.3 The utf8 Character Set (Alias for utf8mb3)]&amp;lt;/ref&amp;gt;&lt;br /&gt;
# Converts a string encoded in ANSI to UTF-8 with a given code page.&amp;lt;ref name=&amp;quot;ref_87d6dd89&amp;quot;&amp;gt;[http://docwiki.embarcadero.com/RADStudio/Sydney/en/UTF-8_Conversion_Routines UTF-8 Conversion Routines]&amp;lt;/ref&amp;gt;&lt;br /&gt;
# One benefit of UTF-8 is its ability to deal with languages that have 100s and 1000s of characters.&amp;lt;ref name=&amp;quot;ref_d4f04d65&amp;quot;&amp;gt;[https://docs.moodle.org/en/UTF-8 MoodleDocs]&amp;lt;/ref&amp;gt;&lt;br /&gt;
# UTF-8 is a clever way of encoding Unicode text.&amp;lt;ref name=&amp;quot;ref_c5571263&amp;quot;&amp;gt;[https://www.johndcook.com/blog/2019/09/09/how-utf-8-works/ How UTF-8 Unicode encoding works]&amp;lt;/ref&amp;gt;&lt;br /&gt;
# I’ve mentioned it a couple times lately, but I haven’t blogged about UTF-8 per se.&amp;lt;ref name=&amp;quot;ref_c5571263&amp;quot; /&amp;gt;&lt;br /&gt;
# UTF-8 is a way of encoding Unicode so that an ASCII text file encodes to itself.&amp;lt;ref name=&amp;quot;ref_c5571263&amp;quot; /&amp;gt;&lt;br /&gt;
# When software reading UTF-8 comes across a byte starting with 1, it counts how many 1’s follow before encountering a 0.&amp;lt;ref name=&amp;quot;ref_c5571263&amp;quot; /&amp;gt;&lt;br /&gt;
# The use utf8 pragma tells the Perl parser to allow UTF-8 in the program text in the current lexical scope.&amp;lt;ref name=&amp;quot;ref_0fa95d6e&amp;quot;&amp;gt;[https://perldoc.perl.org/utf8 Perl pragma to enable/disable UTF-8 (or UTF-EBCDIC) in source code]&amp;lt;/ref&amp;gt;&lt;br /&gt;
# The no utf8 pragma tells Perl to switch back to treating the source text as literal bytes in the current lexical scope.&amp;lt;ref name=&amp;quot;ref_0fa95d6e&amp;quot; /&amp;gt;&lt;br /&gt;
# Do not use this pragma for anything else than telling Perl that your script is written in UTF-8.&amp;lt;ref name=&amp;quot;ref_0fa95d6e&amp;quot; /&amp;gt;&lt;br /&gt;
# Bytes in the source text that are not in the ASCII character set will be treated as being part of a literal UTF-8 sequence.&amp;lt;ref name=&amp;quot;ref_0fa95d6e&amp;quot; /&amp;gt;&lt;br /&gt;
# UTF-8 uses an 8-bit code unit, and UTF-16 uses a 16-bit code unit.&amp;lt;ref name=&amp;quot;ref_64c426a7&amp;quot;&amp;gt;[https://flaviocopes.com/unicode/ Introduction to Unicode and UTF-8]&amp;lt;/ref&amp;gt;&lt;br /&gt;
# UTF-8 was designed to be backward compatible with ASCII.&amp;lt;ref name=&amp;quot;ref_64c426a7&amp;quot; /&amp;gt;&lt;br /&gt;
# UTF-16 is a variable length encoding system, like UTF-8, but uses 2 bytes (16 bits) as the minimum for any character representation.&amp;lt;ref name=&amp;quot;ref_64c426a7&amp;quot; /&amp;gt;&lt;br /&gt;
# However, early version of the UTF-8 specification got some entries wrong (in some cases it permitted overlong characters).&amp;lt;ref name=&amp;quot;ref_f60445ec&amp;quot;&amp;gt;[https://capec.mitre.org/data/definitions/80.html CAPEC-80: Using UTF-8 Encoding to Bypass Validation Logic (Version 3.3)]&amp;lt;/ref&amp;gt;&lt;br /&gt;
# UTF-8 encoders are supposed to use the &amp;quot;shortest possible&amp;quot; encoding, but naive decoders may accept encodings that are longer than necessary.&amp;lt;ref name=&amp;quot;ref_f60445ec&amp;quot; /&amp;gt;&lt;br /&gt;
# Techniques Try to use UTF-8 encoding of content in Scripts in order to bypass validation routines.&amp;lt;ref name=&amp;quot;ref_f60445ec&amp;quot; /&amp;gt;&lt;br /&gt;
# Try to use UTF-8 encoding of content in HTML in order to bypass validation routines.&amp;lt;ref name=&amp;quot;ref_f60445ec&amp;quot; /&amp;gt;&lt;br /&gt;
# Now known as “UTF-8”, FSS-UTF was essentially completed.&amp;lt;ref name=&amp;quot;ref_bf56866f&amp;quot;&amp;gt;[https://www.ionos.com/digitalguide/websites/website-creation/utf-8-encoding-global-digital-communication/ UTF-8: the network standard]&amp;lt;/ref&amp;gt;&lt;br /&gt;
# The first argument is a character encoding name, like &amp;quot;UTF-8&amp;quot; or &amp;quot;ASCII&amp;quot; or &amp;quot;EUC-JP&amp;quot;.&amp;lt;ref name=&amp;quot;ref_8d7a4963&amp;quot;&amp;gt;[https://stackoverflow.com/questions/6162484/why-does-modern-perl-avoid-utf-8-by-default Why does modern Perl avoid UTF-8 by default?]&amp;lt;/ref&amp;gt;&lt;br /&gt;
# Database drivers can be flaky; if you use DBD::SQLite with just Perl, it will work out, but if some other tool has put text stored as some encoding other than UTF-8 in your database...&amp;lt;ref name=&amp;quot;ref_8d7a4963&amp;quot; /&amp;gt;&lt;br /&gt;
# Unless you say use utf8 at the top of each file, Perl will not assume that your source code is UTF-8.&amp;lt;ref name=&amp;quot;ref_8d7a4963&amp;quot; /&amp;gt;&lt;br /&gt;
# It prints the UTF-8 data with a poo at the end of each line.&amp;lt;ref name=&amp;quot;ref_8d7a4963&amp;quot; /&amp;gt;&lt;br /&gt;
===소스===&lt;br /&gt;
 &amp;lt;references /&amp;gt;&lt;/div&gt;</summary>
		<author><name>Pythagoras0</name></author>
	</entry>
</feed>