<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="ko">
	<id>http://samediff.kr/wiki/index.php?action=history&amp;feed=atom&amp;title=Fast_RCNN</id>
	<title>Fast RCNN - 편집 역사</title>
	<link rel="self" type="application/atom+xml" href="http://samediff.kr/wiki/index.php?action=history&amp;feed=atom&amp;title=Fast_RCNN"/>
	<link rel="alternate" type="text/html" href="http://samediff.kr/wiki/index.php?title=Fast_RCNN&amp;action=history"/>
	<updated>2026-04-27T13:33:54Z</updated>
	<subtitle>이 문서의 편집 역사</subtitle>
	<generator>MediaWiki 1.34.0</generator>
	<entry>
		<id>http://samediff.kr/wiki/index.php?title=Fast_RCNN&amp;diff=13776&amp;oldid=prev</id>
		<title>2017년 8월 6일 (일) 15:34에 Admin님의 편집</title>
		<link rel="alternate" type="text/html" href="http://samediff.kr/wiki/index.php?title=Fast_RCNN&amp;diff=13776&amp;oldid=prev"/>
		<updated>2017-08-06T15:34:04Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;ko&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;← 이전 판&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;2017년 8월 6일 (일) 15:34 판&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l1&quot; &gt;1번째 줄:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;1번째 줄:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Ross Girshick  &amp;lt;ref&amp;gt;혼자 썼는데 본문의 모든 주어가 We. 관행인가봄. &amp;lt;br&amp;gt;&amp;lt;span class=gray&amp;gt;아니 그럼 [https://ko.wikipedia.org/wiki/체스터_윌러드 체스터 윌러드]는 뭐야.&amp;lt;/span&amp;gt;&amp;lt;/ref&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Ross Girshick  &amp;lt;ref&amp;gt;혼자 썼는데 본문의 모든 주어가 We. 관행인가봄. &amp;lt;br&amp;gt;&amp;lt;span class=gray&amp;gt;아니 그럼 [https://ko.wikipedia.org/wiki/체스터_윌러드 체스터 윌러드]는 뭐야.&amp;lt;/span&amp;gt;&amp;lt;/ref&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Microsoft Research&lt;del class=&quot;diffchange diffchange-inline&quot;&gt;&amp;lt;ref&amp;gt;지금은 페북에 있는듯&amp;lt;/ref&amp;gt;&lt;/del&gt;&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Microsoft Research&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;[https://github.com/rbgirshick/fast-rcnn github]&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;[https://github.com/rbgirshick/fast-rcnn github]&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Admin</name></author>
		
	</entry>
	<entry>
		<id>http://samediff.kr/wiki/index.php?title=Fast_RCNN&amp;diff=13775&amp;oldid=prev</id>
		<title>2017년 8월 6일 (일) 15:32에 Admin님의 편집</title>
		<link rel="alternate" type="text/html" href="http://samediff.kr/wiki/index.php?title=Fast_RCNN&amp;diff=13775&amp;oldid=prev"/>
		<updated>2017-08-06T15:32:04Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;ko&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;← 이전 판&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;2017년 8월 6일 (일) 15:32 판&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l35&quot; &gt;35번째 줄:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;35번째 줄:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;pdf height=610&amp;gt;file:frcnn12.pdf&amp;lt;/pdf&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;pdf height=610&amp;gt;file:frcnn12.pdf&amp;lt;/pdf&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;제대로 &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;이해 했는지 모르겠다만&lt;/del&gt;&amp;lt;&lt;del class=&quot;diffchange diffchange-inline&quot;&gt;ref&lt;/del&gt;&amp;gt;&lt;del class=&quot;diffchange diffchange-inline&quot;&gt;원문은 다음과 같다.&lt;/del&gt;&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;① max pooling해서 나온 후보군의 경우 오직 max값을 가진 pixel만 넘어오게 되어 있으므로, ([[unpooling switch]]를 사용하여) 해당 pixel에 대해서만 gradient를 계산할 것이고, ② roi를 뽑는 과정에서 computation을 share하게 되어 있으므로 한 픽셀이 여러곳으로 들어갈 수 있는데, bp되는 모든 값을 sum하겠다는 뜻. &amp;lt;ref&amp;gt;&amp;lt;del&amp;gt;&lt;/ins&gt;제대로 &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;이해한거 맞는지 자신이 없어서 원문을…&lt;/ins&gt;&amp;lt;&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;/del&lt;/ins&gt;&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;poem&amp;gt;&amp;lt;blockquote&amp;gt;Let \(x_i ∈ \R\) be the \(i\)-th activation input into the RoI pooling layer and let \(y_{rj}\) be the layer’s \(j\)-th output from the \(r\)-th RoI. The RoI pooling layer computes \(y_{rj} = x_{i^{∗}(r,j)}\), in which \(i^{∗}(r, j) = \text{argmax}_{ i'∈\mathcal{R}(r,j)} x_{i'}\) . \(\mathcal{R}(r, j) \) is the index set of inputs in the sub-window over which the output unit \(y_{rj}\) max pools. A single \(x_i\) may be assigned to several different outputs \(y_{rj}\) .&amp;lt;br&amp;gt;(중략)&amp;lt;br&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;poem&amp;gt;&amp;lt;blockquote&amp;gt;Let \(x_i ∈ \R\) be the \(i\)-th activation input into the RoI pooling layer and let \(y_{rj}\) be the layer’s \(j\)-th output from the \(r\)-th RoI. The RoI pooling layer computes \(y_{rj} = x_{i^{∗}(r,j)}\), in which \(i^{∗}(r, j) = \text{argmax}_{ i'∈\mathcal{R}(r,j)} x_{i'}\) . \(\mathcal{R}(r, j) \) is the index set of inputs in the sub-window over which the output unit \(y_{rj}\) max pools. A single \(x_i\) may be assigned to several different outputs \(y_{rj}\) .&amp;lt;br&amp;gt;(중략)&amp;lt;br&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;In words, for each mini-batch RoI \(r\) and for each pooling output unit \(y_{rj}\), the partial derivative \(∂L/∂y_{rj}\) is accumulated if \(i\) is the argmax selected for \(y_{rj}\) by max pooling.&amp;lt;/blockquote&amp;gt;&amp;lt;/poem&amp;gt;&amp;lt;/ref&amp;gt;&lt;del class=&quot;diffchange diffchange-inline&quot;&gt;, ① max pooling해서 나온 후보군의 경우 오직 max값을 가진 pixel만 넘어오게 되어 있으므로, ([[unpooling switch]]를 사용하여) 해당 pixel에 대해서만 gradient를 계산할 것이고, ② roi를 뽑는 과정에서 computation을 share하게 되어 있으므로 한 픽셀이 여러곳으로 들어갈 수 있는데, bp되는 모든 값을 sum하겠다는 뜻.&lt;/del&gt;&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;In words, for each mini-batch RoI \(r\) and for each pooling output unit \(y_{rj}\), the partial derivative \(∂L/∂y_{rj}\) is accumulated if \(i\) is the argmax selected for \(y_{rj}\) by max pooling.&amp;lt;/blockquote&amp;gt;&amp;lt;/poem&amp;gt;&amp;lt;/ref&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Scale invariance위해 두가지 방법을 해봄. 하나는 이미지 사이즈를 일정하게 고정하고 brutal force하게 RoI를 주는법, 하나는 이미지 피라미드에서 취하는 법. 첫번째 방법은 net이 모든 object size에 대해 학습해야 하고(대부분의 실험을 이렇게 함) 두번째 방법은 RoI가 거의 일정하게 유지된다. 실험할때는 \(224^2\) pixel에 최대한 가깝게 했다. 실험결과는 SPPnet과 일치하는데, multi-scale이 아주 약간 더 좋기는 하지만, 둘의 성능이 거의 동일하다. 따라서 다른 모든 실험은 single-scale로 이루어졌다.  &lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Scale invariance위해 두가지 방법을 해봄. 하나는 이미지 사이즈를 일정하게 고정하고 brutal force하게 RoI를 주는법, 하나는 이미지 피라미드에서 취하는 법. 첫번째 방법은 net이 모든 object size에 대해 학습해야 하고(대부분의 실험을 이렇게 함) 두번째 방법은 RoI가 거의 일정하게 유지된다. 실험할때는 \(224^2\) pixel에 최대한 가깝게 했다. 실험결과는 SPPnet과 일치하는데, multi-scale이 아주 약간 더 좋기는 하지만, 둘의 성능이 거의 동일하다. 따라서 다른 모든 실험은 single-scale로 이루어졌다.  &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Admin</name></author>
		
	</entry>
	<entry>
		<id>http://samediff.kr/wiki/index.php?title=Fast_RCNN&amp;diff=13774&amp;oldid=prev</id>
		<title>2017년 8월 6일 (일) 15:29에 Admin님의 편집</title>
		<link rel="alternate" type="text/html" href="http://samediff.kr/wiki/index.php?title=Fast_RCNN&amp;diff=13774&amp;oldid=prev"/>
		<updated>2017-08-06T15:29:35Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;ko&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;← 이전 판&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;2017년 8월 6일 (일) 15:29 판&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l9&quot; &gt;9번째 줄:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;9번째 줄:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;아래 논문내용중 등장하는 슬라이드 조각조각은 저자가 직접 작성해서 공개&amp;lt;ref&amp;gt;http://www.robots.ox.ac.uk/~tvg/publications/talks/fast-rcnn-slides.pdf&amp;lt;/ref&amp;gt;한 것.&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;아래 논문내용중 등장하는 슬라이드 조각조각은 저자가 직접 작성해서 공개&amp;lt;ref&amp;gt;http://www.robots.ox.ac.uk/~tvg/publications/talks/fast-rcnn-slides.pdf&amp;lt;/ref&amp;gt;한 것.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;del class=&quot;diffchange diffchange-inline&quot;&gt;그냥 &lt;/del&gt;R-CNN&amp;lt;ref name=r9&amp;gt;R. Girshick, J. Donahue, T. Darrell, and J. Malik. Rich feature hierarchies for accurate object detection and semantic segmentation. In CVPR, 2014.&amp;lt;/ref&amp;gt; : R-CNN first finetunes a ConvNet on object proposals using log loss. Then, it fits SVMs to ConvNet features. These SVMs act as object detectors, replacing the softmax classifier learnt by fine-tuning. In the third training stage, bounding-box regressors are learned. … Detection with VGG16 takes 47s / image (on a Nvidia K40 GPU overclocked to 875 MHz.). &amp;lt;del&amp;gt;엄청나게 느림&amp;lt;/del&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;Slow &lt;/ins&gt;R-CNN&amp;lt;ref name=r9&amp;gt;R. Girshick, J. Donahue, T. Darrell, and J. Malik. Rich feature hierarchies for accurate object detection and semantic segmentation. In CVPR, 2014.&amp;lt;/ref&amp;gt; : R-CNN first finetunes a ConvNet on object proposals using log loss. Then, it fits SVMs to ConvNet features. These SVMs act as object detectors, replacing the softmax classifier learnt by fine-tuning. In the third training stage, bounding-box regressors are learned. … Detection with VGG16 takes 47s / image (on a Nvidia K40 GPU overclocked to 875 MHz.). &amp;lt;del&amp;gt;엄청나게 느림&amp;lt;/del&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;pdf height=610&amp;gt;file:Frcnn2.pdf&amp;lt;/pdf&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;pdf height=610&amp;gt;file:Frcnn2.pdf&amp;lt;/pdf&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Admin</name></author>
		
	</entry>
	<entry>
		<id>http://samediff.kr/wiki/index.php?title=Fast_RCNN&amp;diff=13773&amp;oldid=prev</id>
		<title>2017년 8월 6일 (일) 15:29에 Admin님의 편집</title>
		<link rel="alternate" type="text/html" href="http://samediff.kr/wiki/index.php?title=Fast_RCNN&amp;diff=13773&amp;oldid=prev"/>
		<updated>2017-08-06T15:29:18Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;ko&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;← 이전 판&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;2017년 8월 6일 (일) 15:29 판&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l9&quot; &gt;9번째 줄:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;9번째 줄:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;아래 논문내용중 등장하는 슬라이드 조각조각은 저자가 직접 작성해서 공개&amp;lt;ref&amp;gt;http://www.robots.ox.ac.uk/~tvg/publications/talks/fast-rcnn-slides.pdf&amp;lt;/ref&amp;gt;한 것.&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;아래 논문내용중 등장하는 슬라이드 조각조각은 저자가 직접 작성해서 공개&amp;lt;ref&amp;gt;http://www.robots.ox.ac.uk/~tvg/publications/talks/fast-rcnn-slides.pdf&amp;lt;/ref&amp;gt;한 것.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;그냥 R-CNN&amp;lt;ref name=r9&amp;gt;R. Girshick, J. Donahue, T. Darrell, and J. Malik. Rich feature hierarchies for accurate object detection and semantic segmentation. In CVPR, 2014.&amp;lt;/ref&amp;gt;&lt;del class=&quot;diffchange diffchange-inline&quot;&gt;은 이런가봄 &lt;/del&gt;: R-CNN first finetunes a ConvNet on object proposals using log loss. Then, it fits SVMs to ConvNet features. These SVMs act as object detectors, replacing the softmax classifier learnt by fine-tuning. In the third training stage, bounding-box regressors are learned. … Detection with VGG16 takes 47s / image (on a Nvidia K40 GPU overclocked to 875 MHz.). &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;이야 ~ &lt;/del&gt;&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;그냥 R-CNN&amp;lt;ref name=r9&amp;gt;R. Girshick, J. Donahue, T. Darrell, and J. Malik. Rich feature hierarchies for accurate object detection and semantic segmentation. In CVPR, 2014.&amp;lt;/ref&amp;gt; : R-CNN first finetunes a ConvNet on object proposals using log loss. Then, it fits SVMs to ConvNet features. These SVMs act as object detectors, replacing the softmax classifier learnt by fine-tuning. In the third training stage, bounding-box regressors are learned. … Detection with VGG16 takes 47s / image (on a Nvidia K40 GPU overclocked to 875 MHz.). &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;&amp;lt;del&amp;gt;엄청나게 느림&amp;lt;/del&amp;gt;&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;del class=&quot;diffchange diffchange-inline&quot;&gt;그냥 R-CNN은 object proposal마다 cnn forward하는데, SPPnets&lt;/del&gt;&amp;lt;&lt;del class=&quot;diffchange diffchange-inline&quot;&gt;ref name&lt;/del&gt;=&lt;del class=&quot;diffchange diffchange-inline&quot;&gt;r11&lt;/del&gt;&amp;gt;&lt;del class=&quot;diffchange diffchange-inline&quot;&gt;K.He, X.Zhang, S.Ren, and J.Sun. Spatial pyramid pooling in deep convolutional networks for visual recognition. In ECCV,2014&lt;/del&gt;.&amp;lt;/&lt;del class=&quot;diffchange diffchange-inline&quot;&gt;ref&lt;/del&gt;&amp;gt;&lt;del class=&quot;diffchange diffchange-inline&quot;&gt;가 미리 cnn돌려놓고 거기서부터 feature뽑아내는 식으로 test time은 10~100배, training time도 3배정도 개선했다. 단, SPPnets는 R-CNN과 달리 spatial pyramid pooling앞의 convolutional layers를 update할 수 없다.&lt;/del&gt;&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;pdf height&lt;/ins&gt;=&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;610&lt;/ins&gt;&amp;gt;&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;file:Frcnn2&lt;/ins&gt;.&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;pdf&lt;/ins&gt;&amp;lt;/&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;pdf&lt;/ins&gt;&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;del class=&quot;diffchange diffchange-inline&quot;&gt;입력으로는 이미지와 &lt;/del&gt;object &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;proposals를 받는다&lt;/del&gt;. &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;먼저 이미지가 convnet지나면서 feature map을 만들고 이 feature map과 앞의 object proposal로부터 RoI &lt;/del&gt;pooling &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;layer가 일정한 길이의 feature vector들을 뽑아낸다&lt;/del&gt;. &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;이 feature vector들이 fc를 지나가면서 두가지 출력을 내는데 하나는 클래스정보(K object class + ‘background’의 softmax)&lt;/del&gt;, &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;다른 하나는 영역(refined bounding box by &lt;/del&gt;&amp;lt;&lt;del class=&quot;diffchange diffchange-inline&quot;&gt;i&lt;/del&gt;&amp;gt;&lt;del class=&quot;diffchange diffchange-inline&quot;&gt;category-specific&lt;/del&gt;&amp;lt;/&lt;del class=&quot;diffchange diffchange-inline&quot;&gt;i&lt;/del&gt;&amp;gt; &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;bounding&lt;/del&gt;-&lt;del class=&quot;diffchange diffchange-inline&quot;&gt;box regressors&lt;/del&gt;. &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;각 클래스마다 네개의 좌표)&lt;/del&gt;.  &lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;그냥 R-CNN은 &lt;/ins&gt;object &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;proposal마다 cnn forward하는데, SPPnets&amp;lt;ref name=r11&amp;gt;K.He, X.Zhang, S.Ren, and J.Sun&lt;/ins&gt;. &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;Spatial pyramid &lt;/ins&gt;pooling &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;in deep convolutional networks for visual recognition&lt;/ins&gt;. &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;In ECCV&lt;/ins&gt;,&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;2014.&lt;/ins&gt;&amp;lt;&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;br&amp;gt;&amp;lt;pdf height=610&amp;gt;file:frcnn4.pdf&amp;lt;/pdf&lt;/ins&gt;&amp;gt;&amp;lt;/&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;ref&lt;/ins&gt;&amp;gt;&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;가 미리 cnn돌려놓고 거기서부터 feature뽑아내는 식으로 test time은 10~100배, training time도 3배정도 개선했다. 단, SPPnets는 R&lt;/ins&gt;-&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;CNN과 달리 spatial pyramid pooling앞의 convolutional layers를 update할 수 없다&lt;/ins&gt;.&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;&amp;lt;ref&amp;gt;pyramid pooling하기 때문임. &amp;lt;del&amp;gt;억지로 하려면 뭐 못할것도 없겠다만…&amp;lt;/del&amp;gt;&amp;lt;br&amp;gt;&amp;lt;pdf height=610&amp;gt;file:frcnn7&lt;/ins&gt;.&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;pdf&amp;lt;/pdf&amp;gt;&amp;lt;/ref&amp;gt;&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;이미지 하나당 RoI하나씩 해서 학습하는 것(SPPnet과 R-CNN은 이렇게 한다)보다 같은 RoI개수라도 적은 이미지 수를 사용하면 학습이 빠르다. 이미지 하나당 cnn한번만 통과하면 RoI마다 feature를 얻어내기 때문이다(cnn결과를 share함). 보통 이미지 전체가 RoI로 잡히는 일이 많기 때문에 이렇게 하면 계산속도 이득이 크다. 이미지당 중복된 RoI를 뽑을 때 서로간 correlation이 문제될 수 있지만, 실제 실험(이미지 당 64개씩 RoI, batch size=2)결과 괜찮았다.&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;입력으로는 이미지와 object proposals를 받는다. 먼저 이미지가 convnet지나면서 feature map을 만들고 이 feature map과 앞의 object proposal로부터 RoI pooling layer가 일정한 길이의 feature vector들을 뽑아낸다. 이 feature vector들이 fc를 지나가면서 두가지 출력을 내는데 하나는 클래스정보(K object class + ‘background’의 softmax), 다른 하나는 영역(refined bounding box by &amp;lt;i&amp;gt;category-specific&amp;lt;/i&amp;gt; bounding-box regressors. 각 클래스마다 네개의 좌표). &amp;lt;ref&amp;gt;&amp;lt;pdf height=610&amp;gt;file:frcnn9.pdf&amp;lt;/pdf&amp;gt;&amp;lt;/ref&amp;gt;&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt; &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;이미지 하나당 RoI하나씩 해서 학습하는 것(SPPnet과 R-CNN은 이렇게 한다)보다 같은 RoI개수라도 적은 이미지 수를 사용하면 학습이 빠르다. 이미지 하나당 cnn한번만 통과하면 RoI마다 feature를 얻어내기 때문이다(cnn결과를 share함). 보통 이미지 전체가 RoI로 잡히는 일이 많기 때문에 이렇게 하면 계산속도 이득이 크다. 이미지당 중복된 RoI를 뽑을 때 서로간 correlation이 문제될 수 있지만, 실제 실험(이미지 당 64개씩 RoI, batch size=2)결과 괜찮았다.&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;&amp;lt;ref&amp;gt;&amp;lt;pdf height=610&amp;gt;file:frcnn10.pdf&amp;lt;/pdf&amp;gt;&amp;lt;/ref&amp;gt;&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;loss는 다음을 쓴다.&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;loss는 다음을 쓴다.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l27&quot; &gt;27번째 줄:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;29번째 줄:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;학습할 때, RoI중 25%는 IoU(intersection over union) \(&amp;gt;0.5\)에서, 나머지는 IoU\(=[0.1, 0.5)\)에서 썼다. 구간 시작값 \(0.1\)은 heuristic하게 잡은 것이다. background는 IoU=\(0\)이다. 이미지는 \(0.5\)의 확률로 horizontally filp이고 이 외에 augmentation은 하지 않았다.&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;학습할 때, RoI중 25%는 IoU(intersection over union) \(&amp;gt;0.5\)에서, 나머지는 IoU\(=[0.1, 0.5)\)에서 썼다. 구간 시작값 \(0.1\)은 heuristic하게 잡은 것이다. background는 IoU=\(0\)이다. 이미지는 \(0.5\)의 확률로 horizontally filp이고 이 외에 augmentation은 하지 않았다.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;RoI pooling layer에서 back propagation은 다음과 &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;같은데,&lt;/del&gt;&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;RoI pooling layer에서 back propagation은 다음과 &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;같다.&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;$$\frac{\partial L}{\partial x_i} = \sum_r \sum_j [ i = i^* (r,j)] \frac{\partial L}{\partial y_{rj}} $$&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;$$\frac{\partial L}{\partial x_i} = \sum_r \sum_j [ i = i^* (r,j)] \frac{\partial L}{\partial y_{rj}} $$&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;del class=&quot;diffchange diffchange-inline&quot;&gt;대강 이해하기로는, one batch안에서 (각 이미지당) \(x_i\)가 최대일 때만 loss계산해서 bp하겠다는 말 같은데, ‘\(x_i\)가 최대’라는게 무슨 뜻인지 모르겠다&lt;/del&gt;. &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;그냥 norm인가&lt;/del&gt;.&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;그림을 보면 더 감이 잘 온다&lt;/ins&gt;.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt; &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;&amp;lt;pdf height=610&amp;gt;file:frcnn12&lt;/ins&gt;.&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;pdf&amp;lt;/pdf&amp;gt;&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;원문은 다음과 같다.&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;제대로 이해 했는지 모르겠다만&amp;lt;ref&amp;gt;&lt;/ins&gt;원문은 다음과 같다.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;blockquote&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;&amp;lt;poem&amp;gt;&lt;/ins&gt;&amp;lt;blockquote&amp;gt;Let \(x_i ∈ \R\) be the \(i\)-th activation input into the RoI pooling layer and let \(y_{rj}\) be the layer’s \(j\)-th output from the \(r\)-th RoI. The RoI pooling layer computes \(y_{rj} = x_{i^{∗}(r,j)}\), in which \(i^{∗}(r, j) = \text{argmax}_{ i'∈\mathcal{R}(r,j)} x_{i'}\) . \(\mathcal{R}(r, j) \) is the index set of inputs in the sub-window over which the output unit \(y_{rj}\) max pools. A single \(x_i\) may be assigned to several different outputs \(y_{rj}\) .&amp;lt;br&amp;gt;(중략)&amp;lt;br&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Let \(x_i ∈ \R\) be the \(i\)-th activation input into the RoI pooling layer and let \(y_{rj}\) be the layer’s \(j\)-th output from the \(r\)-th RoI. The RoI pooling layer computes \(y_{rj} = x_{i^{∗}(r,j)}\), in which \(i^{∗}(r, j) = \text{argmax}_{ i'∈\mathcal{R}(r,j)} x_{i'}\) . \(\mathcal{R}(r, j) \) is the index set of inputs in the sub-window over which the output unit \(y_{rj}\) max pools. A single \(x_i\) may be assigned to several different outputs \(y_{rj}\) .&amp;lt;br&amp;gt;(중략)&amp;lt;br&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;In words, for each mini-batch RoI \(r\) and for each pooling output unit \(y_{rj}\), the partial derivative \(∂L/∂y_{rj}\) is accumulated if \(i\) is the argmax selected for \(y_{rj}\) by max pooling.&amp;lt;/blockquote&amp;gt;&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;&amp;lt;/poem&amp;gt;&amp;lt;/ref&amp;gt;, ① max pooling해서 나온 후보군의 경우 오직 max값을 가진 pixel만 넘어오게 되어 있으므로, ([[unpooling switch]]를 사용하여) 해당 pixel에 대해서만 gradient를 계산할 것이고, ② roi를 뽑는 과정에서 computation을 share하게 되어 있으므로 한 픽셀이 여러곳으로 들어갈 수 있는데, bp되는 모든 값을 sum하겠다는 뜻.&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;In words, for each mini-batch RoI \(r\) and for each pooling output unit \(y_{rj}\), the partial derivative \(∂L/∂y_{rj}\) is accumulated if \(i\) is the argmax selected for \(y_{rj}\) by max pooling.&lt;/div&gt;&lt;/td&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;/blockquote&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Scale invariance위해 두가지 방법을 해봄. 하나는 이미지 사이즈를 일정하게 고정하고 brutal force하게 RoI를 주는법, 하나는 이미지 피라미드에서 취하는 법. 첫번째 방법은 net이 모든 object size에 대해 학습해야 하고(대부분의 실험을 이렇게 함) 두번째 방법은 RoI가 거의 일정하게 유지된다. 실험할때는 \(224^2\) pixel에 최대한 가깝게 했다. 실험결과는 SPPnet과 일치하는데, multi-scale이 아주 약간 더 좋기는 하지만, 둘의 성능이 거의 동일하다. 따라서 다른 모든 실험은 single-scale로 이루어졌다.  &lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Scale invariance위해 두가지 방법을 해봄. 하나는 이미지 사이즈를 일정하게 고정하고 brutal force하게 RoI를 주는법, 하나는 이미지 피라미드에서 취하는 법. 첫번째 방법은 net이 모든 object size에 대해 학습해야 하고(대부분의 실험을 이렇게 함) 두번째 방법은 RoI가 거의 일정하게 유지된다. 실험할때는 \(224^2\) pixel에 최대한 가깝게 했다. 실험결과는 SPPnet과 일치하는데, multi-scale이 아주 약간 더 좋기는 하지만, 둘의 성능이 거의 동일하다. 따라서 다른 모든 실험은 single-scale로 이루어졌다.  &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Admin</name></author>
		
	</entry>
	<entry>
		<id>http://samediff.kr/wiki/index.php?title=Fast_RCNN&amp;diff=13766&amp;oldid=prev</id>
		<title>2017년 8월 6일 (일) 14:58에 Admin님의 편집</title>
		<link rel="alternate" type="text/html" href="http://samediff.kr/wiki/index.php?title=Fast_RCNN&amp;diff=13766&amp;oldid=prev"/>
		<updated>2017-08-06T14:58:14Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;ko&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;← 이전 판&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;2017년 8월 6일 (일) 14:58 판&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l1&quot; &gt;1번째 줄:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;1번째 줄:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Ross Girshick  &amp;lt;ref&amp;gt;혼자 썼는데 본문의 모든 주어가 We. 관행인가봄. &amp;lt;br&amp;gt;&amp;lt;span class=gray&amp;gt;아니 그럼 [https://ko.wikipedia.org/wiki/체스터_윌러드 체스터 윌러드]는 뭐야.&amp;lt;/span&amp;gt;&amp;lt;/ref&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Ross Girshick  &amp;lt;ref&amp;gt;혼자 썼는데 본문의 모든 주어가 We. 관행인가봄. &amp;lt;br&amp;gt;&amp;lt;span class=gray&amp;gt;아니 그럼 [https://ko.wikipedia.org/wiki/체스터_윌러드 체스터 윌러드]는 뭐야.&amp;lt;/span&amp;gt;&amp;lt;/ref&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Microsoft Research&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Microsoft Research&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;&amp;lt;ref&amp;gt;지금은 페북에 있는듯&amp;lt;/ref&amp;gt;&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;[https://github.com/rbgirshick/fast-rcnn github]&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;[https://github.com/rbgirshick/fast-rcnn github]&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;[https://arxiv.org/abs/1504.08083 arXiv:1504.08083]&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;[https://arxiv.org/abs/1504.08083 arXiv:1504.08083]&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;아래 논문내용중 등장하는 슬라이드 조각조각은 저자가 직접 작성해서 공개&amp;lt;ref&amp;gt;http://www.robots.ox.ac.uk/~tvg/publications/talks/fast-rcnn-slides.pdf&amp;lt;/ref&amp;gt;한 것.&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;그냥 R-CNN&amp;lt;ref name=r9&amp;gt;R. Girshick, J. Donahue, T. Darrell, and J. Malik. Rich feature hierarchies for accurate object detection and semantic segmentation. In CVPR, 2014.&amp;lt;/ref&amp;gt;은 이런가봄 : R-CNN first finetunes a ConvNet on object proposals using log loss. Then, it fits SVMs to ConvNet features. These SVMs act as object detectors, replacing the softmax classifier learnt by fine-tuning. In the third training stage, bounding-box regressors are learned. … Detection with VGG16 takes 47s / image (on a Nvidia K40 GPU overclocked to 875 MHz.). 이야 ~  &lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;그냥 R-CNN&amp;lt;ref name=r9&amp;gt;R. Girshick, J. Donahue, T. Darrell, and J. Malik. Rich feature hierarchies for accurate object detection and semantic segmentation. In CVPR, 2014.&amp;lt;/ref&amp;gt;은 이런가봄 : R-CNN first finetunes a ConvNet on object proposals using log loss. Then, it fits SVMs to ConvNet features. These SVMs act as object detectors, replacing the softmax classifier learnt by fine-tuning. In the third training stage, bounding-box regressors are learned. … Detection with VGG16 takes 47s / image (on a Nvidia K40 GPU overclocked to 875 MHz.). 이야 ~  &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Admin</name></author>
		
	</entry>
	<entry>
		<id>http://samediff.kr/wiki/index.php?title=Fast_RCNN&amp;diff=13753&amp;oldid=prev</id>
		<title>2017년 8월 4일 (금) 09:31에 Admin님의 편집</title>
		<link rel="alternate" type="text/html" href="http://samediff.kr/wiki/index.php?title=Fast_RCNN&amp;diff=13753&amp;oldid=prev"/>
		<updated>2017-08-04T09:31:30Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;ko&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;← 이전 판&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;2017년 8월 4일 (금) 09:31 판&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l45&quot; &gt;45번째 줄:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;45번째 줄:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;맨 마지막 softmax를 SVM으로 바꾸어 보았는데 softmax가 조금 더 나았다. stagewise보다 one-shot learning이 더 낫다는 또 하나의 증거.&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;맨 마지막 softmax를 SVM으로 바꾸어 보았는데 softmax가 조금 더 나았다. stagewise보다 one-shot learning이 더 낫다는 또 하나의 증거.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;proposal을 엄청 늘려도 보았는데 별 도움 안되었다. 매우 과다하게 늘리면 오히려 mAP를 떨어트린다. “sparse object proposals are better”  &lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;proposal을 엄청 늘려도 보았는데 별 도움 안되었다. 매우 과다하게 늘리면 오히려 mAP를 떨어트린다. “sparse object proposals are better” &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;&amp;lt;ref&amp;gt;논문에서 주로 selective search사용하는 것으로 보였음. low-level feature사용해서 superpixel을 greedy하게 합하는 방법&amp;lt;/ref&amp;gt;&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Admin</name></author>
		
	</entry>
	<entry>
		<id>http://samediff.kr/wiki/index.php?title=Fast_RCNN&amp;diff=13743&amp;oldid=prev</id>
		<title>2017년 8월 4일 (금) 09:04에 Admin님의 편집</title>
		<link rel="alternate" type="text/html" href="http://samediff.kr/wiki/index.php?title=Fast_RCNN&amp;diff=13743&amp;oldid=prev"/>
		<updated>2017-08-04T09:04:39Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;ko&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;← 이전 판&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;2017년 8월 4일 (금) 09:04 판&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l1&quot; &gt;1번째 줄:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;1번째 줄:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Ross Girshick  &amp;lt;ref&amp;gt;혼자 썼는데 본문의 모든 주어가 We. 관행인가봄. &amp;lt;br&amp;gt;아니 그럼 [https://ko.wikipedia.org/wiki/체스터_윌러드 체스터 윌러드]는 뭐야.&amp;lt;/ref&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Ross Girshick  &amp;lt;ref&amp;gt;혼자 썼는데 본문의 모든 주어가 We. 관행인가봄. &amp;lt;br&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;&amp;gt;&amp;lt;span class=gray&lt;/ins&gt;&amp;gt;아니 그럼 [https://ko.wikipedia.org/wiki/체스터_윌러드 체스터 윌러드]는 뭐야.&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;&amp;lt;/span&amp;gt;&lt;/ins&gt;&amp;lt;/ref&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Microsoft Research&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Microsoft Research&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Admin</name></author>
		
	</entry>
	<entry>
		<id>http://samediff.kr/wiki/index.php?title=Fast_RCNN&amp;diff=13742&amp;oldid=prev</id>
		<title>2017년 8월 4일 (금) 09:02에 Admin님의 편집</title>
		<link rel="alternate" type="text/html" href="http://samediff.kr/wiki/index.php?title=Fast_RCNN&amp;diff=13742&amp;oldid=prev"/>
		<updated>2017-08-04T09:02:59Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;ko&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;← 이전 판&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;2017년 8월 4일 (금) 09:02 판&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l11&quot; &gt;11번째 줄:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;11번째 줄:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;그냥 R-CNN은 object proposal마다 cnn forward하는데, SPPnets&amp;lt;ref name=r11&amp;gt;K.He, X.Zhang, S.Ren, and J.Sun. Spatial pyramid pooling in deep convolutional networks for visual recognition. In ECCV,2014.&amp;lt;/ref&amp;gt;가 미리 cnn돌려놓고 거기서부터 feature뽑아내는 식으로 test time은 10~100배, training time도 3배정도 개선했다. 단, SPPnets는 R-CNN과 달리 spatial pyramid pooling앞의 convolutional layers를 update할 수 없다.&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;그냥 R-CNN은 object proposal마다 cnn forward하는데, SPPnets&amp;lt;ref name=r11&amp;gt;K.He, X.Zhang, S.Ren, and J.Sun. Spatial pyramid pooling in deep convolutional networks for visual recognition. In ECCV,2014.&amp;lt;/ref&amp;gt;가 미리 cnn돌려놓고 거기서부터 feature뽑아내는 식으로 test time은 10~100배, training time도 3배정도 개선했다. 단, SPPnets는 R-CNN과 달리 spatial pyramid pooling앞의 convolutional layers를 update할 수 없다.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;입력으로는 이미지와 object proposals를 받는다. 먼저 이미지가 convnet지나면서 feature map을 만들고 이 feature map과 앞의 object proposal로부터 RoI pooling layer가 일정한 길이의 feature vector들을 뽑아낸다. 이 feature vector들이 fc를 지나가면서 두가지 출력을 내는데 하나는 클래스정보(K object class + ‘background’의 softmax), 다른 하나는 영역(refined bounding box by &amp;lt;i&amp;gt;category-specific&amp;lt;/i&amp;gt; bounding-box regressors).&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;입력으로는 이미지와 object proposals를 받는다. 먼저 이미지가 convnet지나면서 feature map을 만들고 이 feature map과 앞의 object proposal로부터 RoI pooling layer가 일정한 길이의 feature vector들을 뽑아낸다. 이 feature vector들이 fc를 지나가면서 두가지 출력을 내는데 하나는 클래스정보(K object class + ‘background’의 softmax), 다른 하나는 영역(refined bounding box by &amp;lt;i&amp;gt;category-specific&amp;lt;/i&amp;gt; bounding-box regressors&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;. 각 클래스마다 네개의 좌표&lt;/ins&gt;).  &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;이미지 하나당 RoI하나씩 해서 학습하는 것(SPPnet과 R-CNN은 이렇게 한다)보다 같은 RoI개수라도 적은 이미지 수를 사용하면 학습이 빠르다. 이미지 하나당 cnn한번만 통과하면 RoI마다 feature를 얻어내기 때문이다(cnn결과를 share함). 보통 이미지 전체가 RoI로 잡히는 일이 많기 때문에 이렇게 하면 계산속도 이득이 크다. 이미지당 중복된 RoI를 뽑을 때 서로간 correlation이 문제될 수 있지만, 실제 실험(이미지 당 64개씩 RoI, batch size=2)결과 괜찮았다.&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;이미지 하나당 RoI하나씩 해서 학습하는 것(SPPnet과 R-CNN은 이렇게 한다)보다 같은 RoI개수라도 적은 이미지 수를 사용하면 학습이 빠르다. 이미지 하나당 cnn한번만 통과하면 RoI마다 feature를 얻어내기 때문이다(cnn결과를 share함). 보통 이미지 전체가 RoI로 잡히는 일이 많기 때문에 이렇게 하면 계산속도 이득이 크다. 이미지당 중복된 RoI를 뽑을 때 서로간 correlation이 문제될 수 있지만, 실제 실험(이미지 당 64개씩 RoI, batch size=2)결과 괜찮았다.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Admin</name></author>
		
	</entry>
	<entry>
		<id>http://samediff.kr/wiki/index.php?title=Fast_RCNN&amp;diff=13741&amp;oldid=prev</id>
		<title>2017년 8월 4일 (금) 09:00에 Admin님의 편집</title>
		<link rel="alternate" type="text/html" href="http://samediff.kr/wiki/index.php?title=Fast_RCNN&amp;diff=13741&amp;oldid=prev"/>
		<updated>2017-08-04T09:00:03Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;ko&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;← 이전 판&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;2017년 8월 4일 (금) 09:00 판&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l37&quot; &gt;37번째 줄:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;37번째 줄:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Scale invariance위해 두가지 방법을 해봄. 하나는 이미지 사이즈를 일정하게 고정하고 brutal force하게 RoI를 주는법, 하나는 이미지 피라미드에서 취하는 법. 첫번째 방법은 net이 모든 object size에 대해 학습해야 하고(대부분의 실험을 이렇게 함) 두번째 방법은 RoI가 거의 일정하게 유지된다. 실험할때는 \(224^2\) pixel에 최대한 가깝게 했다. 실험결과는 SPPnet과 일치하는데, multi-scale이 아주 약간 더 좋기는 하지만, 둘의 성능이 거의 동일하다. 따라서 다른 모든 실험은 single-scale로 이루어졌다.  &lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Scale invariance위해 두가지 방법을 해봄. 하나는 이미지 사이즈를 일정하게 고정하고 brutal force하게 RoI를 주는법, 하나는 이미지 피라미드에서 취하는 법. 첫번째 방법은 net이 모든 object size에 대해 학습해야 하고(대부분의 실험을 이렇게 함) 두번째 방법은 RoI가 거의 일정하게 유지된다. 실험할때는 \(224^2\) pixel에 최대한 가깝게 했다. 실험결과는 SPPnet과 일치하는데, multi-scale이 아주 약간 더 좋기는 하지만, 둘의 성능이 거의 동일하다. 따라서 다른 모든 실험은 single-scale로 이루어졌다.  &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Detection할 때, 더 빠르게 하기 위해 Truncated SVD&amp;lt;ref name=r5&amp;gt;&amp;lt;/ref&amp;gt;&amp;lt;ref name=r23&amp;gt;&amp;lt;/ref&amp;gt; 쓸 수도 있다. weight vector \(W \approx U\Sigma_t V^T\), U는 \(u \times t\) matrix이고 \(W\)의 first \(t\) left-singular vectors. 이렇게 하면 parameter가 \(uv\)개 에서 \(t(u+v)\)로 줄어들어서 \(t &amp;lt; \min (u, v)\)일 때 효과가 좋다. VOC07대상으로 한 실험에서 FRCN은 VGG16보다 146배 빠른데, truncated SVD까지 하면 213배 빠르다. SPPnet과 비교하면 각 7, 10배. 정확도는 0.3%정도 저하된다.  &lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Detection할 때, 더 빠르게 하기 위해 Truncated SVD&amp;lt;ref name=r5&amp;gt;&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;E. Denton, W. Zaremba, J. Bruna, Y. LeCun, and R. Fergus. Exploiting linear structure within convolutional networks for efficient evaluation. In NIPS, 2014.&lt;/ins&gt;&amp;lt;/ref&amp;gt;&amp;lt;ref name=r23&amp;gt;&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;J. Xue, J. Li, and Y. Gong. Restructuring of deep neural network acoustic models with singular value decomposition. In Interspeech, 2013.&lt;/ins&gt;&amp;lt;/ref&amp;gt; 쓸 수도 있다. weight vector \(W \approx U\Sigma_t V^T\), U는 \(u \times t\) matrix이고 \(W\)의 first \(t\) left-singular vectors. 이렇게 하면 parameter가 \(uv\)개 에서 \(t(u+v)\)로 줄어들어서 \(t &amp;lt; \min (u, v)\)일 때 효과가 좋다. VOC07대상으로 한 실험에서 FRCN은 VGG16보다 146배 빠른데, truncated SVD까지 하면 213배 빠르다. SPPnet과 비교하면 각 7, 10배. 정확도는 0.3%정도 저하된다.  &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;   &lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;   &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;많이 깊지 않은 net에 대해서는 마지막 fc nets만 fine-tuning하면 된다고 알려져 있고&amp;lt;ref name=r11 /&amp;gt;, deep한 net에 대해서는 그렇지 않음을 실험으로 확인했다. 곧, RoI pooling layer를 통한 학습이 중요하다는 뜻이다. 그렇다고 모든 conv layer가 fine-tune되어야 하는가 하면 그것도 아니다. 실험결과 conv-1은 generic해서, fine tune의 효과가 좋지 않았다. task-specific하게 결정하면 된다. 이 paper에서 VGG16은 모두 conv3_1 보다 이후의 layers(12개중 9개)만 학습시켰다. (적은 양을 update하면 GPU메모리문제를 피할 수 있다. )&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;많이 깊지 않은 net에 대해서는 마지막 fc nets만 fine-tuning하면 된다고 알려져 있고&amp;lt;ref name=r11 /&amp;gt;, deep한 net에 대해서는 그렇지 않음을 실험으로 확인했다. 곧, RoI pooling layer를 통한 학습이 중요하다는 뜻이다. 그렇다고 모든 conv layer가 fine-tune되어야 하는가 하면 그것도 아니다. 실험결과 conv-1은 generic해서, fine tune의 효과가 좋지 않았다. task-specific하게 결정하면 된다. 이 paper에서 VGG16은 모두 conv3_1 보다 이후의 layers(12개중 9개)만 학습시켰다. (적은 양을 update하면 GPU메모리문제를 피할 수 있다. )&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l46&quot; &gt;46번째 줄:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;46번째 줄:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;proposal을 엄청 늘려도 보았는데 별 도움 안되었다. 매우 과다하게 늘리면 오히려 mAP를 떨어트린다. “sparse object proposals are better”  &lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;proposal을 엄청 늘려도 보았는데 별 도움 안되었다. 매우 과다하게 늘리면 오히려 mAP를 떨어트린다. “sparse object proposals are better”  &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;   &lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;   &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;=x=&lt;/del&gt;&lt;/div&gt;&lt;/td&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;=x=&lt;/del&gt;&lt;/div&gt;&lt;/td&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;----&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;----&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;references/&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;references/&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;disqus/&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;disqus/&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Admin</name></author>
		
	</entry>
	<entry>
		<id>http://samediff.kr/wiki/index.php?title=Fast_RCNN&amp;diff=13740&amp;oldid=prev</id>
		<title>2017년 8월 4일 (금) 08:58에 Admin님의 편집</title>
		<link rel="alternate" type="text/html" href="http://samediff.kr/wiki/index.php?title=Fast_RCNN&amp;diff=13740&amp;oldid=prev"/>
		<updated>2017-08-04T08:58:41Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;ko&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;← 이전 판&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;2017년 8월 4일 (금) 08:58 판&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l37&quot; &gt;37번째 줄:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;37번째 줄:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Scale invariance위해 두가지 방법을 해봄. 하나는 이미지 사이즈를 일정하게 고정하고 brutal force하게 RoI를 주는법, 하나는 이미지 피라미드에서 취하는 법. 첫번째 방법은 net이 모든 object size에 대해 학습해야 하고(대부분의 실험을 이렇게 함) 두번째 방법은 RoI가 거의 일정하게 유지된다. 실험할때는 \(224^2\) pixel에 최대한 가깝게 했다. 실험결과는 SPPnet과 일치하는데, multi-scale이 아주 약간 더 좋기는 하지만, 둘의 성능이 거의 동일하다. 따라서 다른 모든 실험은 single-scale로 이루어졌다.  &lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Scale invariance위해 두가지 방법을 해봄. 하나는 이미지 사이즈를 일정하게 고정하고 brutal force하게 RoI를 주는법, 하나는 이미지 피라미드에서 취하는 법. 첫번째 방법은 net이 모든 object size에 대해 학습해야 하고(대부분의 실험을 이렇게 함) 두번째 방법은 RoI가 거의 일정하게 유지된다. 실험할때는 \(224^2\) pixel에 최대한 가깝게 했다. 실험결과는 SPPnet과 일치하는데, multi-scale이 아주 약간 더 좋기는 하지만, 둘의 성능이 거의 동일하다. 따라서 다른 모든 실험은 single-scale로 이루어졌다.  &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Detection할 때, 더 빠르게 하기 위해 Truncated SVD&amp;lt;ref name=r5&amp;gt;&amp;lt;/ref&amp;gt;&amp;lt;ref name=r23&amp;gt;&amp;lt;/ref&amp;gt; 쓸 수도 있다. weight vector \(W \approx U\Sigma_t V^T\), U는 \(u \times t\) matrix이고 \(W\)의 first \(t\) left-singular vectors. 이렇게 하면 parameter가 \(uv\)개 에서 \(t(u+v)\)로 줄어들어서 \(t &amp;lt; \min (u, v)\)일 때 효과가 좋다. VOC07대상으로 한 실험에서 &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;FRCNN은 &lt;/del&gt;VGG16보다 146배 빠른데, truncated SVD까지 하면 213배 빠르다. SPPnet과 비교하면 각 7, 10배. 정확도는 0.3%정도 저하된다.  &lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Detection할 때, 더 빠르게 하기 위해 Truncated SVD&amp;lt;ref name=r5&amp;gt;&amp;lt;/ref&amp;gt;&amp;lt;ref name=r23&amp;gt;&amp;lt;/ref&amp;gt; 쓸 수도 있다. weight vector \(W \approx U\Sigma_t V^T\), U는 \(u \times t\) matrix이고 \(W\)의 first \(t\) left-singular vectors. 이렇게 하면 parameter가 \(uv\)개 에서 \(t(u+v)\)로 줄어들어서 \(t &amp;lt; \min (u, v)\)일 때 효과가 좋다. VOC07대상으로 한 실험에서 &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;FRCN은 &lt;/ins&gt;VGG16보다 146배 빠른데, truncated SVD까지 하면 213배 빠르다. SPPnet과 비교하면 각 7, 10배. 정확도는 0.3%정도 저하된다.  &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;   &lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;   &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;많이 깊지 않은 net에 대해서는 마지막 fc nets만 fine-tuning하면 된다고 알려져 있고&amp;lt;ref name=r11 /&amp;gt;, deep한 net에 대해서는 그렇지 않음을 실험으로 확인했다. 곧, RoI pooling layer를 통한 학습이 중요하다는 뜻이다. 그렇다고 모든 conv layer가 fine-tune되어야 하는가 하면 그것도 아니다. 실험결과 conv-1은 generic해서, fine tune의 효과가 좋지 않았다. task-specific하게 결정하면 된다. 이 paper에서 VGG16은 모두 conv3_1 보다 이후의 layers(12개중 9개)만 학습시켰다. (적은 양을 update하면 GPU메모리문제를 피할 수 있다. )&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;많이 깊지 않은 net에 대해서는 마지막 fc nets만 fine-tuning하면 된다고 알려져 있고&amp;lt;ref name=r11 /&amp;gt;, deep한 net에 대해서는 그렇지 않음을 실험으로 확인했다. 곧, RoI pooling layer를 통한 학습이 중요하다는 뜻이다. 그렇다고 모든 conv layer가 fine-tune되어야 하는가 하면 그것도 아니다. 실험결과 conv-1은 generic해서, fine tune의 효과가 좋지 않았다. task-specific하게 결정하면 된다. 이 paper에서 VGG16은 모두 conv3_1 보다 이후의 layers(12개중 9개)만 학습시켰다. (적은 양을 update하면 GPU메모리문제를 피할 수 있다. )&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l43&quot; &gt;43번째 줄:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;43번째 줄:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;클래스만 loss로 해서 학습시키는 것에 비해 multi-task training의 결과가 더 좋다.  &lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;클래스만 loss로 해서 학습시키는 것에 비해 multi-task training의 결과가 더 좋다.  &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;맨 마지막 softmax를 SVM으로 바꾸어 보았는데 softmax가 조금 더 나았다. stagewise보다 one-shot learning이 더 낫다는 또 하나의 증거.&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;proposal을 엄청 늘려도 보았는데 별 도움 안되었다. 매우 과다하게 늘리면 오히려 mAP를 떨어트린다. “sparse object proposals are better” &lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;   &lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;   &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;=x=&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;=x=&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Admin</name></author>
		
	</entry>
</feed>