<?xml version='1.0' encoding='UTF-8'?><?xml-stylesheet href="http://www.blogger.com/styles/atom.css" type="text/css"?><feed xmlns='http://www.w3.org/2005/Atom' xmlns:openSearch='http://a9.com/-/spec/opensearchrss/1.0/' xmlns:georss='http://www.georss.org/georss' xmlns:gd='http://schemas.google.com/g/2005' xmlns:thr='http://purl.org/syndication/thread/1.0'><id>tag:blogger.com,1999:blog-3250503894549245556</id><updated>2012-02-16T16:42:31.601+01:00</updated><category term='Fedora Commons'/><category term='clustering'/><category term='data mining'/><category term='multimodal'/><category term='Pia'/><category term='news'/><category term='web'/><category term='identification'/><category term='Robert'/><category term='stats gathering'/><category term='GOS'/><category term='Martí'/><category term='HTTP'/><category term='classification'/><category term='Social TV'/><category term='medical'/><category term='test'/><category term='upseek'/><category term='Monica'/><category term='detection'/><category term='Binary Partition Trees'/><category term='Jaume'/><category term='Relevance Feedback'/><category term='video server'/><category term='sports'/><category term='video'/><category term='semantics'/><category term='descriptors'/><category term='Internet TV'/><category term='Carles'/><category term='DVB'/><category term='Marcel'/><category term='mysql'/><category term='java'/><category term='query by text'/><category term='webservices'/><category term='object'/><category term='Lluís'/><category term='Aida'/><category term='Bruna'/><category term='Videolan'/><category term='bash'/><category term='thumbnail'/><category term='vlc'/><category term='Elías'/><category term='rest'/><category term='eva'/><category term='region'/><category term='Eli'/><category term='social networks'/><category term='iPhone'/><category term='annotation'/><category term='text'/><category term='MPEG-2 TS'/><category term='xavi'/><category term='html'/><category term='Eclipse'/><category term='coding'/><category term='Brain Computer Interfaces'/><category term='open-source'/><category term='segmentation'/><category term='Web TV'/><category term='thesis'/><category term='Architecture'/><category term='javascript'/><category term='KaHo'/><category term='Manel'/><category term='measures'/><category term='Alex'/><category term='video player'/><category term='query by image'/><category term='conference'/><category term='Christian'/><category term='Augmented Reality'/><category term='HbbTv'/><category term='Electronics'/><category term='evaluation'/><category term='MPEG-2'/><category term='user interface'/><category term='Objective-C'/><category term='IPTV'/><category term='video format'/><category term='image'/><category term='programming languages'/><category term='Laurens'/><category term='supervised learning'/><category term='teaching'/><category term='database'/><category term='anna'/><category term='navigation'/><category term='interactive segmentation'/><category term='video transcoding'/><category term='soap'/><category term='Neus'/><category term='GAT'/><category term='Cloud Computing'/><category term='php'/><category term='ajax'/><category term='iPhone app'/><category term='Google Summer of Code'/><category term='khristina'/><category term='streaming'/><category term='retrieval'/><category term='Server'/><category term='RAMON'/><category term='digital television'/><category term='databases'/><category term='Marc'/><category term='I3MEDIA'/><category term='Set Top Box'/><category term='object retrieval'/><category term='Laura'/><category term='copy detection'/><category term='twitter'/><category term='upload'/><category term='erasmus'/><category term='FLEX'/><category term='Cristina'/><category term='Mireia'/><category term='MPEG-7'/><category term='fusion'/><category term='metadata'/><title type='text'>Bit search</title><subtitle type='html'>Research related to the team directed by Xavier Giró-i-Nieto.</subtitle><link rel='http://schemas.google.com/g/2005#feed' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/posts/default'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default?max-results=100'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/'/><link rel='hub' href='http://pubsubhubbub.appspot.com/'/><link rel='next' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default?start-index=101&amp;max-results=100'/><author><name>Xavi Giró-i-Nieto</name><uri>https://profiles.google.com/110271653716466055491</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh5.googleusercontent.com/--EaBZ1Bzzsg/AAAAAAAAAAI/AAAAAAAAAwU/4J6xIhZ3Xyo/s512-c/photo.jpg'/></author><generator version='7.00' uri='http://www.blogger.com'>Blogger</generator><openSearch:totalResults>302</openSearch:totalResults><openSearch:startIndex>1</openSearch:startIndex><openSearch:itemsPerPage>100</openSearch:itemsPerPage><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-2559687339314581383</id><published>2012-02-13T12:57:00.000+01:00</published><updated>2012-02-13T13:01:33.911+01:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='eva'/><category scheme='http://www.blogger.com/atom/ns#' term='Brain Computer Interfaces'/><title type='text'>Object detection at a local scale with EEG by Bigdely-Shamlo et al (2008)</title><content type='html'>&lt;br /&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span lang="EN-US"&gt;Following our overview on EEG-based classification of images, we have come up with a new article that explores the localization of objects within an image. The article "&lt;a href="http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&amp;amp;arnumber=4595650"&gt;Brain Activity-Based Image Classification From Rapid Serial Visual Presentation&lt;/a&gt;"&lt;/span&gt;&lt;span lang="EN-US"&gt; &lt;/span&gt;presents aclassification system based on a &lt;a href="http://en.wikipedia.org/wiki/Brain%E2%80%93computer_interface"&gt;Brain Computer Interface (BCI)&lt;/a&gt; device that aims at detecting objects of interest in satellite images. The study was performed by &lt;a href="http://sccn.ucsd.edu/~nima/"&gt;Nima Bigdely-Shamlo&lt;/a&gt;, Andrey Vankov, &lt;a href="http://ilabs.washington.edu/research-staff/bio/i-labs-rey-ramirez-phd"&gt;Rey R.Ramirez&lt;/a&gt; and &lt;a href="http://sccn.ucsd.edu/~scott/"&gt;Scott Makeig&lt;/a&gt; of &lt;a href="http://ucsd.edu/"&gt;University of California&lt;/a&gt;, San Diego.&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span lang="EN-US"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span lang="EN-US"&gt;The paper considers an automatic classificationsystem of images in real time based on a BCI interface that measuresthe &lt;a href="http://en.wikipedia.org/wiki/Electroencephalography"&gt;EEG&lt;/a&gt; waves from 128 channels (&lt;i&gt;High-densityelectroencephalographic acquired date&lt;/i&gt;). The final application isthe detection of which satellite images from the city of London contain a plane.Users perform a visual inspection of the image at a local scale guided by &lt;a href="http://en.wikipedia.org/wiki/Rapid_serial_visual_presentation"&gt;RSVP&lt;/a&gt;. The systemacquires the user's EEG signals during the viewing of the clip to study whether itis possible to use these signals to detect the images with planes, a task that is considered too challengingfor the computer vision algorithms considered by the authors of the article.Additionally, the experiment also captures the manual classification of images bythe users. This way they can compare the automatic classification based on EEG waveswith the manual detection of the users.&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span lang="EN-US"&gt;The visual inspection guided by RSVP is made expandinga small region of the image and reducing the contrast of the rest of the image. This preprocessing of the image guarantees thefocus of the user's attention on the highlighted region, so the EEG classifier will not only discern if the whole image contains the plane, but also its location. The generated focus scans the complete image using a path called "Heptunxsearch". The scanning algorithm not only varies the position of the focus but also tests different sizes to consider different scales.&amp;nbsp;&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span lang="EN-US"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-eq5H6z5lQGE/TzjVaqoLc8I/AAAAAAAAACY/ltNZzRPdRsY/s1600/focus.jpg" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="297" src="http://3.bp.blogspot.com/-eq5H6z5lQGE/TzjVaqoLc8I/AAAAAAAAACY/ltNZzRPdRsY/s320/focus.jpg" width="320" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Image preprocessing to highlight a focus region.&lt;/td&gt;&lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span lang="EN-US"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span lang="EN-US"&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span lang="EN-US"&gt;Focused images are presented by RSVP in 12Hzand in blocks of 4.1 seconds (&lt;i&gt;Bursts&lt;/i&gt;) that alternate relevant and irrelevantimage clips. These burst series are presented to 8 different users. Every user, aftereach block, indicates whether or not the relevant image has been seen. During trainingsessions, feedback is given to users to inform them if their answers has beencorrect or not. This way, the users also tune their attention and improve their manual detection skills.&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span lang="EN-US"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-ZkCiIsK6MEw/TzjVcosNM5I/AAAAAAAAACg/fxYH5TjRYQI/s1600/burst.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="172" src="http://1.bp.blogspot.com/-ZkCiIsK6MEw/TzjVcosNM5I/AAAAAAAAACg/fxYH5TjRYQI/s320/burst.jpg" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span lang="EN-US"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;br /&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span lang="EN-US"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span lang="EN-US"&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span lang="EN-US"&gt;The authors recommend not showing any relevant image in the first or the last 500ms of each block toprevent the effects that may occur in the EEG waves due to sudden changes inthe limits of image blocks.&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span lang="EN-US"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span lang="EN-US"&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span lang="EN-US"&gt;Most of the paper describes the processing of the acquired data, which corresponds to 128 channels of EEG waves in the 8ms after displaying each image. As a result,each image is described by a characteristics matrix of 128 columns. Then,these matrices are reduced by different methods (&lt;a href="http://en.wikipedia.org/wiki/Independent_component_analysis"&gt;ICA&lt;/a&gt;&amp;nbsp;and &lt;a href="http://en.wikipedia.org/wiki/Principal_component_analysis"&gt;PCA&lt;/a&gt;) to matrices of 50independent columns that characterize each image. Finally we get two matricesof features, one in the time domain and the other in the frequency domain fromwhich the classification is made.&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span lang="EN-US"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span lang="EN-US"&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span lang="EN-US"&gt;The results presented are based onthe study of the &lt;a href="http://en.wikipedia.org/wiki/Receiver_operating_characteristic"&gt;Receiver Operating Characteristic (ROC)&lt;/a&gt; curves. These curves determine the amount oftrue positives &lt;i&gt;tp&lt;/i&gt; (very relevantimages identified) in terms of false positive &lt;i&gt;fp &lt;/i&gt;(irrelevant images identified as relevant) when consideringdifferent decision thresholds by the classifier. To determine the quality of theclassifier, the area under the ROC is calculated (tp and fp normalized at 1), beingthe result better as closer as to the unit.&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span lang="EN-US"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-a_GpKJB67uk/TzjVdbnsBtI/AAAAAAAAACo/H4QZSnO8ixU/s1600/ROC.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="246" src="http://3.bp.blogspot.com/-a_GpKJB67uk/TzjVdbnsBtI/AAAAAAAAACo/H4QZSnO8ixU/s320/ROC.jpg" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span lang="EN-US"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span lang="EN-US"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span lang="EN-US"&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span lang="EN-US"&gt;The authors report results improve when combining theclassifiers in time domain and frequency domain. This fusion is performed by assuming independence between the two classifiers. In this case, the posterior probabilities obtained can be combined with a simple product (&lt;a href="http://en.wikipedia.org/wiki/Naive_Bayes_classifier"&gt;Naive Bayes Classifier&lt;/a&gt;).&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span lang="EN-US"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/-NdqPnDINCH4/TzjVdyFLgMI/AAAAAAAAACs/LEnMA1_Xblg/s1600/taula2.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="211" src="http://2.bp.blogspot.com/-NdqPnDINCH4/TzjVdyFLgMI/AAAAAAAAACs/LEnMA1_Xblg/s640/taula2.jpg" width="640" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span lang="EN-US"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span lang="EN-US"&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span lang="EN-US"&gt;The table shows the values of thearea under the ROC considering the different blocks of images displayed. It isinteresting to note that, although the response of manual annotations could be incorrect (&lt;i&gt;Behaviorally Missed&lt;/i&gt; in the table),the values of these areas are still quite high for six of the eight participants. Thismeans that although users are not aware of having seen the object of interest,the brain continues responding to visual stimulus. The table also reflect the cases where users express that the object has appeared when it really was notpresent on the burst. This error is the cause of the the discrepancy between columns 3&amp;amp;4and 5&amp;amp;6. This type of mistake is very rare since in these cases the area underthe ROC is almost equal.&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span lang="EN-US"&gt;&lt;o:p&gt;&lt;br /&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span lang="EN-US"&gt;In conclusion,the system described is useful for a first search for anobject in a large portion of an image, a task where state of the art computer vision algorithms areinefficient. However, in our opinion, this paper fails into comparing the obtained results with the ones that could be obtained with a state of the art algorithm. It would be desirable to estimate the gain obtained with this EEG-based classifier, that is limited for the need of a human and a maximum frequency in the RSVP, with a fully automatic classification system based, for example, on the &lt;a href="http://en.wikipedia.org/wiki/Viola%E2%80%93Jones_object_detection_framework"&gt;Viola and Jones&lt;/a&gt; object detector.&lt;/span&gt;&lt;o:p&gt;&lt;/o:p&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-2559687339314581383?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/2559687339314581383/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2012/02/object-detection-at-local-scale-with.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/2559687339314581383'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/2559687339314581383'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2012/02/object-detection-at-local-scale-with.html' title='Object detection at a local scale with EEG by Bigdely-Shamlo et al (2008)'/><author><name>Eva Mohedano</name><uri>https://profiles.google.com/103025214252652811285</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh6.googleusercontent.com/-5w20gRlcEqw/AAAAAAAAAAI/AAAAAAAAACQ/iv7Hkh6WAjg/s512-c/photo.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://3.bp.blogspot.com/-eq5H6z5lQGE/TzjVaqoLc8I/AAAAAAAAACY/ltNZzRPdRsY/s72-c/focus.jpg' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-6464037503020795020</id><published>2012-02-02T17:02:00.000+01:00</published><updated>2012-02-02T17:04:22.235+01:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='eva'/><category scheme='http://www.blogger.com/atom/ns#' term='Brain Computer Interfaces'/><title type='text'>EEG-Augmented Image Search by Healy and Smeaton (2011)</title><content type='html'>&lt;br /&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span style="font-family: inherit;"&gt;&lt;span class="apple-converted-space"&gt;&lt;span lang="EN-US" style="background-color: white; line-height: 115%;"&gt;We have started to develop a new project to explorethe potential of &lt;a href="http://bitsearch.blogspot.com/2012/01/p300-wave-for-brain-computer-interfaces.html"&gt;Brain Computer Interfaces (BCI)&lt;/a&gt; for the search and tagging of images.The first step in the project development is to make a study of the state ofthe different researches based on the exploration of these interfaces. Thefirst working paper analyzed has been "&lt;a href="http://doras.dcu.ie/16387/"&gt;Optimising the number of channels in EEG-Augmented Search&lt;/a&gt;" by Graham Healy and Alan Smeaton from the &lt;a href="http://www.dcu.ie/"&gt;Dublin City University&lt;/a&gt; in Ireland.&lt;/span&gt;&lt;/span&gt;&lt;span lang="EN-US"&gt;&amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span style="font-family: inherit;"&gt;&lt;span lang="EN-US"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span lang="EN-US"&gt;The paper describes an experiment based on thedetection of certain target images in a large set of images. For itsrealization, used 4800 images of which 60 contained an object of interest underdifferent views. These images have been extracted from the &lt;a href="http://staff.science.uva.nl/%7Ealoi/"&gt;Amsterdam Library of Object Images (ALOI)&lt;/a&gt;.&amp;nbsp;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/-Du6_Yi4HHY8/TyqyLGDmJRI/AAAAAAAAAxU/g4ofoce_njk/s1600/Screen+Shot+2012-02-02+at+4.51.18+PM.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="123" src="http://4.bp.blogspot.com/-Du6_Yi4HHY8/TyqyLGDmJRI/AAAAAAAAAxU/g4ofoce_njk/s320/Screen+Shot+2012-02-02+at+4.51.18+PM.png" width="320" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Objects from the ALOI dataset&lt;/td&gt;&lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;br /&gt;&lt;br /&gt;&lt;span lang="EN-US"&gt;The relevant images are randomly distributed throughout theset. The sequence of images, known as &lt;a href="http://en.wikipedia.org/wiki/Rapid_serial_visual_presentation"&gt;Rapid Serial Visual Presentation (RSVP)&lt;/a&gt;&lt;/span&gt;&lt;span lang="EN-US"&gt;, is shown to 8 different people ata frequency of 10Hz. These persons are divided into 4 groups, where the 2people from each group will seek images with a different object of interestfrom the other groups.&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span lang="EN-US"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-xj9rgJ6GvUQ/TyqUbiAeL1I/AAAAAAAAABQ/NBNZXwN53jA/s1600/RSVP_1.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="198" src="http://1.bp.blogspot.com/-xj9rgJ6GvUQ/TyqUbiAeL1I/AAAAAAAAABQ/NBNZXwN53jA/s320/RSVP_1.jpg" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span lang="EN-US"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span lang="EN-US"&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span lang="EN-US"&gt;The paper discusses the image-search processes based on BCI interfaces combined with a push-button. In the article, they improve theresults obtained when considering only the pulses on the push-button by adding brain waves captured from EEG. They call this concept ‘&lt;i&gt;EEG-AugmentedImage Search&lt;/i&gt;’.&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span lang="EN-US"&gt;For the detection of the target images, they analyzetwo types of signals:&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span lang="EN-US"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span lang="EN-US"&gt;- &lt;u&gt;Signal Button&lt;/u&gt;, which is acquired by pressinga button when the user sees a target image.&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span lang="EN-US"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;- &lt;u&gt;EEG signals&lt;/u&gt; of the brain activity ofindividuals measured with the device KT88-1016, that consists in 16 sensorsplaced on the scarp. 16 different channels are processed.&lt;br /&gt;&lt;br /&gt;&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-yGtOnbJPYKo/TyqyPZY6AhI/AAAAAAAAAxk/dzslESafBMA/s1600/p300-wave.jpg" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="240" src="http://1.bp.blogspot.com/-yGtOnbJPYKo/TyqyPZY6AhI/AAAAAAAAAxk/dzslESafBMA/s320/p300-wave.jpg" width="320" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;P300 wave for a target event&lt;/td&gt;&lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span lang="EN-US"&gt;These signals are acquired during one second after thepresentation of each image and sampled at 100Hz. Therefore, each image has anassociated feature vector of 100 samples. The 16 EEGfeature vectors are combined to create a single vector of 1600 samples. This fusionstrategy is called ‘&lt;a href="http://dx.doi.org/10.1145/1101149.1101236"&gt;&lt;i&gt;Early Fusion&lt;/i&gt;’&lt;/a&gt;.&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span lang="EN-US"&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span lang="EN-US"&gt;The system analyses the two feature vectors associated to every image (the EEG and the button) to determine if it is relevant to the search. In the case of the button,the pulse is clearly generated when the user presses the button. For EEG, the study isbased on the existence of &lt;a href="http://bitsearch.blogspot.com/2012/01/p300-wave-for-brain-computer-interfaces.html"&gt;P300&lt;/a&gt;&amp;nbsp;waves, whichproduce a maximum of around 300ms after the event. The irrelevant images willbe free from pulse (button) or wave (P300).&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span lang="EN-US"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span lang="EN-US"&gt;Because of &amp;nbsp;the variability in the response time ofdifferent users, the proposed design trains a classifier which adapts tosignals generated by each person. A &lt;a href="http://en.wikipedia.org/wiki/Support_vector_machine"&gt;Support Vector Machine (SVM)&lt;/a&gt; classifier istrained for each type of feature vector (EEG and button) with relevance labelsassociated to each image presented. This same label is used to train a thirdSVM classifier that combines the outputs of the two classifiers, concatenating theprobability values obtained in each case. This strategy is called &lt;a href="http://dx.doi.org/10.1145/1101149.1101236"&gt;‘&lt;i&gt;Late Fusion&lt;/i&gt;’&lt;/a&gt;.&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span lang="EN-US"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span lang="EN-US"&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span lang="EN-US"&gt;The classifier is trained with a subset of trainingimages, 50 target images and 50 non-target images (50/50). The test isperformed with another subset of data with a 10/790 distribution. Theprobability values obtained are sorted from the best to worst, and the precision for the top ten list items is computed (P@10), which correspond with thenumber of relevant images of the test subset. This process is repeated tentimes following a &lt;a href="http://bitsearch.blogspot.com/2011/04/dataset-generation.html"&gt;cross-validation&lt;/a&gt;&amp;nbsp;scheme with random sampling.&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-rKPOMJLK2lU/TyqXvBuvcQI/AAAAAAAAABY/t4SMThjjyqc/s1600/taula.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="159" src="http://1.bp.blogspot.com/-rKPOMJLK2lU/TyqXvBuvcQI/AAAAAAAAABY/t4SMThjjyqc/s320/taula.jpg" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span lang="EN-US"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal"&gt;&lt;span lang="EN-US"&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span lang="EN-US"&gt;In the table above we can note that thecombination of EEG waves with the signal of the button represents an increase inthe quality of the results for each user. Additionally, we can see the resultsof the second study described in the paper (not discussed in this summary)which optimizes the number of EEG channels used for each user.&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span lang="EN-US"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span lang="EN-US"&gt;In conclusion, the article shows that the EEGsignals can be used to increase the success in the process of image search ifthey are combined with the analysis of behavior responses in the context ofa limited amount of channels.&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-6464037503020795020?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/6464037503020795020/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2012/02/eeg-augmented-image-search-from-healy.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/6464037503020795020'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/6464037503020795020'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2012/02/eeg-augmented-image-search-from-healy.html' title='EEG-Augmented Image Search by Healy and Smeaton (2011)'/><author><name>Eva Mohedano</name><uri>https://profiles.google.com/103025214252652811285</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh6.googleusercontent.com/-5w20gRlcEqw/AAAAAAAAAAI/AAAAAAAAACQ/iv7Hkh6WAjg/s512-c/photo.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/-Du6_Yi4HHY8/TyqyLGDmJRI/AAAAAAAAAxU/g4ofoce_njk/s72-c/Screen+Shot+2012-02-02+at+4.51.18+PM.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-8298331318969433403</id><published>2012-01-31T11:54:00.000+01:00</published><updated>2012-02-02T10:07:33.328+01:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='video'/><category scheme='http://www.blogger.com/atom/ns#' term='conference'/><category scheme='http://www.blogger.com/atom/ns#' term='Carles'/><category scheme='http://www.blogger.com/atom/ns#' term='MPEG-7'/><category scheme='http://www.blogger.com/atom/ns#' term='query by image'/><category scheme='http://www.blogger.com/atom/ns#' term='user interface'/><category scheme='http://www.blogger.com/atom/ns#' term='descriptors'/><category scheme='http://www.blogger.com/atom/ns#' term='navigation'/><category scheme='http://www.blogger.com/atom/ns#' term='GOS'/><title type='text'>Intra-video clip retrieval with visual similarity and hierarchical indexing</title><content type='html'>Our participation in the &lt;a href="http://mmm2012.org/vbshowdown/"&gt;Video Browser Showdown&lt;/a&gt; at the &lt;a href="http://mmm2012.org/"&gt;Multimedia Modeling Conference 2012&lt;/a&gt; required the extend of our visual search interface for still images, &lt;a href="http://upseek.upc.edu/gos/"&gt;GOS&lt;/a&gt;, to be able to work with videos. After giving some details about my experience during the competition in the &lt;a href="http://bitsearch.blogspot.com/2012/01/video-browser-showdown-at-multimedia.html"&gt;previous entry&lt;/a&gt;, now I am going to focus on the user interface.&lt;br /&gt;&lt;br /&gt;First of all, the video is processed with a keyframe extractor, which is based on the idea of stripe images. A stripe image is created by taking the centre columns of pixels from every frame and attaching them to each other (see Figure below). This makes it possible to identify the different shots of the video (see &lt;a href="http://www.semedia.org/PubFolder/SEMEDIA_D5.4.pdf"&gt;SEMEDIA project&lt;/a&gt; for more information).&lt;br /&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/-oHN2CXpO9eY/TypSJ9NVCUI/AAAAAAAAA7g/ekO1TZlJkAc/s1600/stripe-image.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="160" src="http://4.bp.blogspot.com/-oHN2CXpO9eY/TypSJ9NVCUI/AAAAAAAAA7g/ekO1TZlJkAc/s640/stripe-image.png" width="640" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;Then, a set of MPEG-7 visual descriptors are computed for each keyframe. Finally, we compute a visual index based on the &lt;a href="http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=4032612"&gt;Hierarchical Cellular Tree (HCT)&lt;/a&gt; in order to rapidly find similar images at retrieval time and have the database organized as a hierarchy.&lt;br /&gt;&lt;br /&gt;Once the video keyframes are indexed, they can be loaded to GOS by selecting the &lt;i&gt;Open Video&lt;/i&gt; option belonging to the &lt;i&gt;File&lt;/i&gt; menu. Then, GOS will open a new tab for each computed descriptor. Each of these tabs displays the images belonging to the root node of the HCT. There is also an additional tab which shows all the keyframes extracted by the shot detector sorted by time.&lt;br /&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/-Ewhn2KLmpgM/TyefJ3QbwOI/AAAAAAAAA7A/uZxKT_6Hhdw/s1600/open-video.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="169" src="http://4.bp.blogspot.com/-Ewhn2KLmpgM/TyefJ3QbwOI/AAAAAAAAA7A/uZxKT_6Hhdw/s640/open-video.png" width="640" /&gt;&lt;/a&gt;&lt;/div&gt;Since the output of the HCT indexing algorithm is a hierarchical clustering of the keyframes in the video, each tab can be understood as a summary of the video according to each visual descriptor. Furthermore, we can take advantage of the tree structure and go down the tree by double-click on an element to visualize which elements represents.&lt;br /&gt;&lt;br /&gt;The challenge proposed in the &lt;a href="http://mmm2012.org/vbshowdown/"&gt;Video Browser Showdown&lt;/a&gt; was to localize the target clip within the video. The presented framework provides GOS with two alternatives to fulfill this task, both of them accessible by right-clicking on the thumbnail of one of the shown keyframes:&lt;br /&gt;&lt;br /&gt;&lt;ul&gt;&lt;li&gt;&lt;b&gt;Find similar images&lt;/b&gt;. This option allows to search similar images to the given one. In fact, there are two different options: Find similar images (default) and Find similar images with... The former allows to find similar images according to a visual descriptor set as default (e.g. Color Layout). The latter allows the user to change the descriptor on which the search must be based. In any case, a new tab with the results of the search is opened.&lt;/li&gt;&lt;/ul&gt;&lt;a href="http://3.bp.blogspot.com/-RfqHHXTzyRc/Tyeon5wTJpI/AAAAAAAAA7I/JGc42Ug4_j8/s1600/find-similar-images.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="232" src="http://3.bp.blogspot.com/-RfqHHXTzyRc/Tyeon5wTJpI/AAAAAAAAA7I/JGc42Ug4_j8/s640/find-similar-images.png" width="640" /&gt;&lt;/a&gt; &lt;br /&gt;&lt;ul&gt;&lt;li&gt;&lt;b&gt;Get temporal segment&lt;/b&gt;. This option allows the user to see the temporal context of the keyframe. When it is selected, the previous and next frames are showed in a new tab. These frames have been extracted at 1 frame per second rate. This option is used to see whether the retrieved keyframe belongs to the target clip or not. Since the elements showed in this tab are not the keyframes extracted by the shot detector, they cannot be used to perform new visual searches because these elements have not been indexed.&lt;/li&gt;&lt;/ul&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/-Sns0PUdkdC4/Tyes1LpaAkI/AAAAAAAAA7Q/8fuzR6VhJiI/s1600/get-temporal-segment.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="232" src="http://4.bp.blogspot.com/-Sns0PUdkdC4/Tyes1LpaAkI/AAAAAAAAA7Q/8fuzR6VhJiI/s640/get-temporal-segment.png" width="640" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;The strategy to retrieve the clip within the video can be summarized as follows:&lt;br /&gt;&lt;ol&gt;&lt;li&gt;See the summary tabs. If you find any keyframe which is likely to be part of the clip, select the &lt;i&gt;Get temporal segment&lt;/i&gt; option to visualize it in the timeline.&amp;nbsp;&lt;/li&gt;&lt;li&gt;If no keyframe belonging to the target clip is shown in the tab, you should try to identify a keyframe which is similar to any of the frames in the target video clip and launch a search by visual similarity. If you cannot find any keyframe which can be useful to perform a visual search, you can obtain new elements by descending through the index by double clicking on a thumbnail.&lt;/li&gt;&lt;li&gt;After performing a visual search, try to find any keyframe which can belong to the target video. In such a case, select the &lt;i&gt;Get temporal segment&lt;/i&gt; option. Otherwise, you can try performing a new visual search by using an element obtained by the previous search.&lt;/li&gt;&lt;li&gt;If you do not succeed in retrieving the target clip, try with the other summary tabs which has not been already used.&lt;/li&gt;&lt;li&gt;In case you fail to retrieve the clip by using the different summary tabs, you should use the temporal tab, which shows all the keyframes extracted by the shot detector sorted by time.&amp;nbsp;&lt;/li&gt;&lt;/ol&gt;These steps are illustrated in the following workflow:&lt;br /&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-9PjQ4v1RceY/Tye59T5vQ1I/AAAAAAAAA7Y/1Fz6Xpvjuo0/s1600/workflow.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="400" src="http://1.bp.blogspot.com/-9PjQ4v1RceY/Tye59T5vQ1I/AAAAAAAAA7Y/1Fz6Xpvjuo0/s400/workflow.png" width="316" /&gt;&lt;/a&gt;&lt;/div&gt;In case you may be a bit overwhelmed with all this information, we have also produced a video tutorial that visually describes the presented workflow.&lt;br /&gt;&lt;br /&gt;&lt;iframe allowfullscreen="" frameborder="0" height="360" src="http://www.youtube.com/embed/5-n4IvmIQfM" width="640"&gt;&lt;/iframe&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-8298331318969433403?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/8298331318969433403/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2012/01/using-gos-as-video-browser.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/8298331318969433403'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/8298331318969433403'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2012/01/using-gos-as-video-browser.html' title='Intra-video clip retrieval with visual similarity and hierarchical indexing'/><author><name>Carles Ventura</name><uri>http://www.blogger.com/profile/13142197003570439057</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/-oHN2CXpO9eY/TypSJ9NVCUI/AAAAAAAAA7g/ekO1TZlJkAc/s72-c/stripe-image.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-2639011670338878573</id><published>2012-01-27T19:10:00.001+01:00</published><updated>2012-01-27T19:18:32.527+01:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='eva'/><category scheme='http://www.blogger.com/atom/ns#' term='Brain Computer Interfaces'/><title type='text'>The P300 wave for Brain Computer Interfaces</title><content type='html'>&lt;br /&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span style="font-family: inherit;"&gt;The &lt;a href="http://en.wikipedia.org/wiki/Brain%E2%80%93computer_interface"&gt;Brain Computer Interfaces&lt;/a&gt; (BCI) technologies are based on theacquisition of user’s brainwaves to be processed and interpreted by a computer.Although most applications of this technologies are in the medical field, alsoin the image processing field scientists are developing a lot of studies aboutthe applicability of such devices.&lt;o:p&gt;&lt;/o:p&gt; It is remarkable that &lt;a href="http://cacm.acm.org/magazines/2011/5"&gt;&lt;i&gt;ACM Comunications &lt;/i&gt;in May 2011&lt;/a&gt; dedicatedits cover and main article for BCI devices technologies.&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span lang="CA" style="font-family: inherit;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;span lang="CA" style="font-family: Arial, Helvetica, sans-serif;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span lang="CA"&gt;&lt;/span&gt;&lt;br /&gt;&lt;div style="background-color: rgba(255, 255, 255, 0.917969); text-align: justify;"&gt;&lt;span lang="CA"&gt;&lt;span style="font-family: inherit;"&gt;BCI devices are based on the measure of&amp;nbsp;&lt;a href="http://en.wikipedia.org/wiki/Eeg" target="_blank"&gt;EEG signals&lt;/a&gt;. These are obtained by measuring&amp;nbsp;&lt;span lang="EN-US"&gt;the electric potential difference produced by brain activity. The acquisition devices are becoming popular thanks to their relatively low price, availability and security for the users (non invasive technique). Several experiments in the literature have reported their applications in different fields.&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="background-color: rgba(255, 255, 255, 0.917969); font-family: Arial, Helvetica, sans-serif; font-size: 13px; text-align: justify;"&gt;&lt;span lang="CA"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span style="font-family: Arial, Helvetica, sans-serif;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-ofmQmJLNqqc/TyKgz5RjNiI/AAAAAAAAABA/-BW9aW1KwZU/s1600/bci.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;span style="color: black; font-family: Arial, Helvetica, sans-serif;"&gt;&lt;img border="0" height="259" src="http://1.bp.blogspot.com/-ofmQmJLNqqc/TyKgz5RjNiI/AAAAAAAAABA/-BW9aW1KwZU/s320/bci.jpg" width="320" /&gt;&lt;/span&gt;&lt;/a&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: justify;"&gt;&lt;span style="font-family: inherit;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: justify;"&gt;&lt;span style="font-family: inherit;"&gt;&lt;span style="background-color: rgba(255, 255, 255, 0.917969);"&gt;One of the most promising analysis that can be performed on EEG is the detection of an&amp;nbsp;&lt;/span&gt;&lt;a href="http://en.wikipedia.org/wiki/Event-related_potential" style="background-color: rgba(255, 255, 255, 0.917969);" target="_blank"&gt;Event Related Potential (ERP)&lt;/a&gt;&lt;span style="background-color: rgba(255, 255, 255, 0.917969);"&gt;, which can be produced by two different types of sources: a perception or a thought. The first case would correspond to an image, a temperature change, a sudden pain..., while the second case implies an internal cognitive stimulus, such as a state of attention or meditation. One of the most popular ERPs is the&amp;nbsp;&lt;/span&gt;&lt;a href="http://en.wikipedia.org/wiki/P300_%28neuroscience%29" style="background-color: rgba(255, 255, 255, 0.917969);" target="_blank"&gt;P300 wave&lt;/a&gt;&lt;span style="background-color: rgba(255, 255, 255, 0.917969);"&gt;, which is measured approximately between 300-600 ms after the stimulus occurs.&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="margin-left: 35.4pt;"&gt;&lt;br /&gt;&lt;div style="background-color: rgba(255, 255, 255, 0.917969); font-size: 13px;"&gt;&lt;span style="font-family: Arial, Helvetica, sans-serif;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="margin-left: 35.4pt;"&gt;&lt;span style="background-color: white; font-family: Arial, Helvetica, sans-serif;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/-O9lypSkGMyk/TyKh0x6eznI/AAAAAAAAABI/EjqVc_QIG3w/s1600/p300.gif" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;span style="color: black; font-family: Arial, Helvetica, sans-serif;"&gt;&lt;img border="0" height="298" src="http://4.bp.blogspot.com/-O9lypSkGMyk/TyKh0x6eznI/AAAAAAAAABI/EjqVc_QIG3w/s320/p300.gif" width="320" /&gt;&lt;/span&gt;&lt;/a&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: justify;"&gt;&lt;span style="font-family: inherit;"&gt;&lt;span style="background-color: rgba(255, 255, 255, 0.917969);"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: justify;"&gt;&lt;span style="font-family: inherit;"&gt;&lt;span style="background-color: rgba(255, 255, 255, 0.917969);"&gt;Scientists that want to observe the P300 wave prepare experiments on users based on the&amp;nbsp;&lt;/span&gt;&lt;a href="http://en.wikipedia.org/wiki/Oddball_paradigm" style="background-color: rgba(255, 255, 255, 0.917969);" target="_blank"&gt;oddball paradigm&lt;/a&gt;&lt;span style="background-color: rgba(255, 255, 255, 0.917969);"&gt;. The basic strategy is to&amp;nbsp;&lt;/span&gt;&lt;span lang="CA" style="background-color: rgba(255, 255, 255, 0.917969);"&gt;present a target stimulus or bull’s eye among other background stimulus.&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="margin-left: 35.25pt;"&gt;&lt;br /&gt;&lt;div style="background-color: rgba(255, 255, 255, 0.917969);"&gt;&lt;span style="font-family: inherit;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="margin-left: 35.25pt;"&gt;&lt;span style="background-color: white; font-family: Arial, Helvetica, sans-serif;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;span style="font-family: Arial, Helvetica, sans-serif;"&gt;&lt;object class="BLOGGER-youtube-video" classid="clsid:D27CDB6E-AE6D-11cf-96B8-444553540000" codebase="http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=6,0,40,0" data-thumbnail-src="http://0.gvt0.com/vi/SyB8xBuROAg/0.jpg" height="266" width="320"&gt;&lt;param name="movie" value="http://www.youtube.com/v/SyB8xBuROAg&amp;fs=1&amp;source=uds" /&gt;&lt;param name="bgcolor" value="#FFFFFF" /&gt;&lt;embed width="320" height="266"  src="http://www.youtube.com/v/SyB8xBuROAg&amp;fs=1&amp;source=uds" type="application/x-shockwave-flash"&gt;&lt;/embed&gt;&lt;/object&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="margin-left: 35.25pt;"&gt;&lt;span style="background-color: white; font-family: Arial, Helvetica, sans-serif;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span style="font-family: Arial, Helvetica, sans-serif;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span style="background-color: rgba(255, 255, 255, 0.917969);"&gt;&lt;span style="font-family: inherit;"&gt;In our work, we would like to explore the possibilities of measuring the P300 wave using images as stimulus. In this case, the target stimulus would correspond to an image which would be relevant for the user according to a certain task, while the background stimulus would be generated by non-relevant images.&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-2639011670338878573?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/2639011670338878573/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2012/01/p300-wave-for-brain-computer-interfaces.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/2639011670338878573'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/2639011670338878573'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2012/01/p300-wave-for-brain-computer-interfaces.html' title='The P300 wave for Brain Computer Interfaces'/><author><name>Eva Mohedano</name><uri>https://profiles.google.com/103025214252652811285</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh6.googleusercontent.com/-5w20gRlcEqw/AAAAAAAAAAI/AAAAAAAAACQ/iv7Hkh6WAjg/s512-c/photo.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://1.bp.blogspot.com/-ofmQmJLNqqc/TyKgz5RjNiI/AAAAAAAAABA/-BW9aW1KwZU/s72-c/bci.jpg' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-2465489807541549867</id><published>2012-01-10T10:42:00.001+01:00</published><updated>2012-02-02T12:14:03.667+01:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='video'/><category scheme='http://www.blogger.com/atom/ns#' term='conference'/><category scheme='http://www.blogger.com/atom/ns#' term='Carles'/><category scheme='http://www.blogger.com/atom/ns#' term='retrieval'/><category scheme='http://www.blogger.com/atom/ns#' term='user interface'/><category scheme='http://www.blogger.com/atom/ns#' term='GOS'/><title type='text'>Winners of the Novice Run in the Video Browser Showdown #mmm2012</title><content type='html'>&lt;div lang="en-GB" style="margin-bottom: 0cm;"&gt;My attendance to the &lt;a href="http://mmm2012.org/vbshowdown/"&gt;Video Browser Showdown&lt;/a&gt; at the &lt;a href="http://mmm2012.org/"&gt;MultiMedia Modeling 2012&lt;/a&gt; conference has beenone of the greatest and exciting of my life. It took place in&lt;a href="http://www.klagenfurt.at/klagenfurt-am-woerthersee/index.asp"&gt;Klagenfurt am Wörthersee&lt;/a&gt;, which is placed in Austria.&lt;span lang="en-GB"&gt; I went to this conference as a representative of the UPC team which also consists of Manel Martos, Xavi Giró-i-Nieto, Verónica Vilaplana and Ferran Marqués, to whom I am very grateful. Furthermore, nothing would be possible without the technical support given by Albert Gil and Josep Pujal. Some details about our work can be found &lt;a href="http://www.springerlink.com/content/g225j8144x104683/"&gt;here&lt;/a&gt;. Moreover, I would like to thank many volunteers from the Image and Video Processing Group which tested the user interface, i.e. the &lt;a href="http://upseek.upc.edu/gos/"&gt;GOS (Graphic Object Searcher)&lt;/a&gt;, and gave us some advice to improve it. They allowed us to check that the interface was really user-friendly. The development of this user interface started in 2008 thanks to the work done by Silvia Cortés (click &lt;a href="http://upcommons.upc.edu/pfc/handle/2099.1/8588"&gt;here&lt;/a&gt; to see her degree's final project). A picture from the testing day is showed below.&lt;/span&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/-7ZCoeaFBZUQ/TxQLE6gcm2I/AAAAAAAAA6w/2dUN1x8-L1M/s1600/IMG_3513.JPG" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="297" src="http://2.bp.blogspot.com/-7ZCoeaFBZUQ/TxQLE6gcm2I/AAAAAAAAA6w/2dUN1x8-L1M/s400/IMG_3513.JPG" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;span lang="en-GB"&gt;The Video BrowserShowdown is a competition in which many teams compete in order toretrieve a short piece of video (around 15-20 seconds), which is onlyseen once in a shared screen, in a longer video, which lasts between1 and 2 hours. There were 30 different videos, which had beenpre-processed. &lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0cm; text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0cm; text-align: justify;"&gt;&lt;a href="http://2.bp.blogspot.com/-M7hQ7AGOLqs/TwvsZYis6CI/AAAAAAAAAj0/GrJl5hA8ipY/s1600/MMM_117.jpg" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="240" src="http://2.bp.blogspot.com/-M7hQ7AGOLqs/TwvsZYis6CI/AAAAAAAAAj0/GrJl5hA8ipY/s400/MMM_117.jpg" width="400" /&gt;&lt;/a&gt;&lt;span lang="en-GB"&gt;The competitionstarted at 13:30 on Friday 6th January, but the participants werecalled to be there 30 minutes before to prepare all the equipmentsand check that everything was ok. However, we had already been calledtwo days before to check that there were no problems with thecommunications between our systems and the server. My surprise wasthat we did not only check the systems, but we also did a test runcompetition.  The organizers of the event were as enthusiastic as allparticipants. Everything was working perfectly and our first surprisewas that we headed the ranking at the run test competition.&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;span lang="en-GB"&gt;Coming back to theactual competition on Friday and having checked that everything wasright, we were asked by the chairman to give him a brief introductionof our video retrieval systems. We were 11 teams, 3 of which werestudent teams from Klagenfurt University but they were not takingpart of the competition officially.&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0cm; text-align: justify;"&gt;&lt;a href="http://1.bp.blogspot.com/-u6IlN5P6EsQ/TwvtI65pj3I/AAAAAAAAAj8/MG58DmraWww/s1600/DSC06074.JPG" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="300" src="http://1.bp.blogspot.com/-u6IlN5P6EsQ/TwvtI65pj3I/AAAAAAAAAj8/MG58DmraWww/s400/DSC06074.JPG" width="400" /&gt;&lt;/a&gt;&lt;span lang="en-GB"&gt;&amp;nbsp;&lt;/span&gt;&lt;br /&gt;&lt;span lang="en-GB"&gt;The competitionconsisted of two rounds: one first round, called expert-run, in whichthe retrieval systems were used by their developers, i.e. theparticipants, and a second round, called novice-run, in which anexternal user had to use the program. Therefore, the chairman asked11 volunteers and they were assigned randomly to each one of theteams. Luckily, we were assigned a very nice and smart girl named &lt;a href="http://le2i.cnrs.fr/-Camille-Simon-?lang=fr"&gt;Camille&lt;/a&gt;. Myintention was explaining how the system worked by using a videotutorial we had prepared for the event but, to my surprise, we wereasked to train our users by using our tools in a run test video.Therefore, I decided to forget about the &lt;a href="http://www.youtube.com/watch?v=5-n4IvmIQfM&amp;amp;feature=youtu.be"&gt;video tutorial&lt;/a&gt; and explainour system by performing an actual search. However, since the runtest video was very easy to find, my goal was not being the fastestteam in retrieving the video but I took advantage of all theavailable time to tell her all the different options and strategieswhich could be used in our video retrieval system. Eventually, wesubmitted the right segment which had been displayed at the sharedscreen. In that screen, in which the target videos were showed, wecould also see which was the accumulated score obtained by each teamat any instant. Moreover, the keyframes submitted by each team werealso displayed and were marked by a red frame if it was a wrongsubmission and by a green frame if it was a right one. The reason whywe were assigned the volunteers at the beginning was that, in thisway, the novice user could also learn about the system and get usedto it through our searches during the expert-run round. Then, theexpert-run round started.&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span lang="en-GB"&gt;The expert-run roundwas very hard. I remind sometimes that I was still watching the videowhen some teams had already retrieved it! I knew that some videosconsisted of clips which were very similar between them, such as aninterview in a political TV program. Therefore, from my point ofview, it was essential to pay attention while the target video wasbeing displayed to get any important detail, any change in the video,any keyframe easier to be retrieved with our system, since the lastpart of the video could be necessary to distinguish from any othersimilar segments or to retrieve it in a faster way. Anyway, it seemednot to make any point in comparison with some other teams, which werecapable of retrieving the videos in a few seconds, not to sayimmediately. However, I decided not to throw away all the work done.There was a user who was learning how to use our system and we couldretrieve most of the target videos. In fact, Camille, who was getting used to the program impressively fast, evenhelped me&lt;/span&gt;&lt;span lang="en-GB"&gt; in one of the searches&lt;/span&gt;&lt;span lang="en-GB"&gt;. It really was an inflexion point since I thought that wecould have a chance during the novice-run round. The expert-run roundfinished and the winner was &lt;a href="http://www.uni-klu.ac.at/tewi/inf/itec/dms/staff/index.html#Manfred%20_del%20Fabro"&gt;Manfred Del Fabro&lt;/a&gt; from the &lt;a href="http://www.uni-klu.ac.at/main/inhalt/1.htm"&gt;Klagenfurt University&lt;/a&gt; after 8 video searches (there was no time for 12 scheduledsearches). In fact, I couldn't see which was our position in the ranking since the teams&amp;nbsp; weren't sorted out by their scores. However, I think there were 2 teams which achieved a score lower than ours.&lt;/span&gt;&lt;/div&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0cm; text-align: justify;"&gt;&lt;a href="http://3.bp.blogspot.com/-Tm7vtco7c40/Typv4LKd-MI/AAAAAAAAA7o/KwEfw03zSK8/s1600/video-browser-showdown-camille.jpg" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="265" src="http://3.bp.blogspot.com/-Tm7vtco7c40/Typv4LKd-MI/AAAAAAAAA7o/KwEfw03zSK8/s400/video-browser-showdown-camille.jpg" width="400" /&gt;&lt;/a&gt;&lt;span lang="en-GB"&gt;Then, the novice-runround started. Now, the novice users were going to use the systems.After the first search, we directly got the third place. I cheered up Camille for the well-done work. Moreover, I realised that the bestteams in the expert-run round were not doing so well at that moment.Maybe their systems were more powerful but more difficult to use fora novice user. After the third or fourth search we reached the top ofthe ranking. It was our first time at the head of the competition.Unfortunately, we went down to the second position after thefollowing search. We were running out of time, so the chairmandecided that the sixth search would be the last one and we had stillchances to win the competition. Camille found the last targetvideo very quickly so we got an accumulated score of 540 points,which allowed us to reach again the head of the competition. Wecrossed the fingers since we could only wait for the other teams.Then, the other team with chances of winning the novice-run alsoretrieve the video, but they were not fast enough and they got anaccumulated score of 536 points. Therefore, our expectances had beenaccomplished. We did win the novice-run round. It was the bestpresent for that &lt;a href="http://en.wikipedia.org/wiki/Biblical_Magi#Spanish_customs"&gt;special date (the Three Wise Men)&lt;/a&gt;.&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span lang="en-GB"&gt;Finally, the twowinners were called to get our awards for the best video browserexpert-run and novice-run. After receiving the award, many peoplecame to me to know how the system worked and which was based on. Theywere surprised how powerful the option for retrieving similar imageswas by simply selecting the option from a pop menu. Furthermore, Camille highlighted that having different tabs summarizing the content of the video according to different criteria was very useful. In addition to this, the use of tabs for the results of the visual searches and to visualize the timeline was also a good decision.&lt;/span&gt;&lt;br /&gt;&lt;span lang="en-GB"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span lang="en-GB"&gt;Then, &lt;a href="http://faculty.eng.fau.edu/omarques/"&gt;Oge Marques&lt;/a&gt;, which is a very nice professor from Florida and lovesFootball Club Barcelona (he wanted me to play its anthem as I wasbeing asked for the award), took me a picture, which is showed below.Eventually, I could feel relax. The competition had been finished. A summary of the tweets written in #mmm2012 can be seen &lt;a href="http://storify.com/timse7/multimedia-modeling-2012"&gt;here&lt;/a&gt;, which includes the congratulations from Oge Marques to the winners of the Video Browser Showdown.&lt;/span&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0cm; text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/-DEc1uC4grpE/Twvu_6qO3cI/AAAAAAAAAkM/7b4b7L4ieSc/s1600/diploma.jpeg" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="476" src="http://4.bp.blogspot.com/-DEc1uC4grpE/Twvu_6qO3cI/AAAAAAAAAkM/7b4b7L4ieSc/s640/diploma.jpeg" width="640" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div style="margin-bottom: 0cm; text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-2465489807541549867?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/2465489807541549867/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2012/01/video-browser-showdown-at-multimedia.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/2465489807541549867'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/2465489807541549867'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2012/01/video-browser-showdown-at-multimedia.html' title='Winners of the Novice Run in the Video Browser Showdown #mmm2012'/><author><name>Carles Ventura</name><uri>http://www.blogger.com/profile/13142197003570439057</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://2.bp.blogspot.com/-7ZCoeaFBZUQ/TxQLE6gcm2I/AAAAAAAAA6w/2dUN1x8-L1M/s72-c/IMG_3513.JPG' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-7277040882707748986</id><published>2011-12-23T13:56:00.001+01:00</published><updated>2012-01-12T13:01:01.848+01:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Eli'/><category scheme='http://www.blogger.com/atom/ns#' term='conference'/><category scheme='http://www.blogger.com/atom/ns#' term='xavi'/><category scheme='http://www.blogger.com/atom/ns#' term='thesis'/><category scheme='http://www.blogger.com/atom/ns#' term='user interface'/><category scheme='http://www.blogger.com/atom/ns#' term='annotation'/><title type='text'>The Semantic Shot Annotator at the Muscle Workshop in Pisa</title><content type='html'>&lt;a href="http://3.bp.blogspot.com/-MfML4-CRkWU/TvSIeDwBq0I/AAAAAAAAAw0/6lymyaEZnEM/s1600/PisaTorre.jpg" imageanchor="1" style="clear: right; float: right; margin-bottom: 1em; margin-left: 1em;"&gt;&lt;img border="0" height="320" src="http://3.bp.blogspot.com/-MfML4-CRkWU/TvSIeDwBq0I/AAAAAAAAAw0/6lymyaEZnEM/s320/PisaTorre.jpg" width="238" /&gt;&lt;/a&gt;Most people travel to &lt;a href="http://en.wikipedia.org/wiki/Pisa"&gt;Pisa&lt;/a&gt; (Italy) to visit the world famous Leaning Tower. This local attraction probably sustains most of the economy in the town. The entrance to the tower costs 15 € which is somehow funny, as you can hardly enjoy the inclination from the inside. The locals explain that the origin of the leaning are the grounds, as the town in build on land gained to the Mediterranean Sea. In fact, it seems that the neighboring baptistery is also slightly inclined.&lt;br /&gt;&lt;br /&gt;Nevertheless, I did not travel to Pisa for tourism. It has been a constant in the last years for me to travel to Italy to attend to conferences, workshop or courses. I can remember twice to Genoa (ICIP 2005 and SAMT), Sicily (SSMS), Trento (ICMR). This time, it was a &lt;a href="http://muscle.isti.cnr.it/pisaworkshop2011/"&gt;workshop&lt;/a&gt; organized by &lt;a href="http://muscle.ercim.eu/"&gt;MUSCLE&lt;/a&gt;, an European Network of Excellence sponsored by the European Commission.&lt;br /&gt;&lt;br /&gt;My contribution to this workshop was presenting the results of the bachelor thesis that Elisabet Carcel presented last June, a joint work at the &lt;a href="http://www.upc.edu/"&gt;UPC&lt;/a&gt; and the &lt;a href="http://www.ccma.cat/inici/inici_eng.htm"&gt;Catalan Broadcast Corporation (CCMA)&lt;/a&gt;. She developed a web-based GUI for the semi-automatic annotation of semantic shot types. The system was aimed to assist the archivers at the CCMA in the annotation of the keyframes which are automatically extracted from the video assets added into the archive. You can find the full details by reading the &lt;a href="http://imatge.upc.edu/%7Exgiro/research/publications/2011/muscle/CarcelMartosGiroMarques.pdf"&gt;pre-print&lt;/a&gt; of the paper, the &lt;a href="http://hdl.handle.net/2099.1/13539"&gt;full thesis&lt;/a&gt; or just browsing through the &lt;a href="http://imatge.upc.edu/%7Exgiro/research/publications/2011/muscle/oral/assets/fallback/index.html"&gt;slides&lt;/a&gt; I presented in Pisa. If your comprehension level of Catalan is good enough you can also watch the oral defense for Eli's thesis in this video (&lt;a href="http://youtu.be/Pv_R-gkEgBA"&gt;first&lt;/a&gt; and &lt;a href="http://youtu.be/bn9_uau1j8c"&gt;second&lt;/a&gt; parts) that was published last week, too. Maybe the most intuitive first contact though is just watching the two video demos from the soccer and parliament domain.&lt;br /&gt;&lt;br /&gt;&lt;iframe allowfullscreen="" frameborder="0" height="360" src="http://www.youtube.com/embed/uURx9GoRJBg" width="640"&gt;&lt;/iframe&gt;&lt;br /&gt;&lt;br /&gt;&lt;iframe allowfullscreen="" frameborder="0" height="360" src="http://www.youtube.com/embed/R7o0sCoe-Gs" width="640"&gt;&lt;/iframe&gt;&lt;br /&gt;&lt;br /&gt;&lt;a href="http://4.bp.blogspot.com/-gWxNEDALqxY/TvSIYo5Bq_I/AAAAAAAAAws/kjF5Lub_5Vg/s1600/PisaJaime.JPG" imageanchor="1" style="clear: right; float: right; margin-bottom: 1em; margin-left: 1em;"&gt;&lt;img border="0" height="149" src="http://4.bp.blogspot.com/-gWxNEDALqxY/TvSIYo5Bq_I/AAAAAAAAAws/kjF5Lub_5Vg/s200/PisaJaime.JPG" width="200" /&gt;&lt;/a&gt;&lt;br /&gt;The participation of the Image Processing Group of the UPC in the workshop was completed with the work of&lt;a href="http://www.blogger.com/"&gt;&lt;span id="goog_1738075281"&gt;&lt;/span&gt;&lt;/a&gt; &lt;a href="https://sites.google.com/site/jaimegallegovila/"&gt;Jaime Gallego Vila&lt;/a&gt;, Montse Solano and &lt;a href="https://imatge.upc.edu/%7Emontse/"&gt;Montse Pardàs&lt;/a&gt;. Jaime presented the latest results of their object tracking algorithm, based on Gaussian Mixture Model of the object and background visual features. Neus and me had worked during &lt;a href="http://hdl.handle.net/2099.1/11710"&gt;Montse Solano's thesis&lt;/a&gt; in the development of a GUI for initializing their tracker and collect their results for an interactive segmentation of video objects.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-7277040882707748986?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/7277040882707748986/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/12/semantic-shot-annotator-at-muscle.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/7277040882707748986'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/7277040882707748986'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/12/semantic-shot-annotator-at-muscle.html' title='The Semantic Shot Annotator at the Muscle Workshop in Pisa'/><author><name>Xavi Giró-i-Nieto</name><uri>https://profiles.google.com/110271653716466055491</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh5.googleusercontent.com/--EaBZ1Bzzsg/AAAAAAAAAAI/AAAAAAAAAwU/4J6xIhZ3Xyo/s512-c/photo.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://3.bp.blogspot.com/-MfML4-CRkWU/TvSIeDwBq0I/AAAAAAAAAw0/6lymyaEZnEM/s72-c/PisaTorre.jpg' height='72' width='72'/><thr:total>0</thr:total><georss:featurename>Pisa, Italy</georss:featurename><georss:point>43.7161354 10.3965843</georss:point><georss:box>43.6243234 10.2386558 43.807947399999996 10.554512800000001</georss:box></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-5574451357374361320</id><published>2011-11-26T23:16:00.005+01:00</published><updated>2011-12-10T16:05:03.441+01:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='supervised learning'/><category scheme='http://www.blogger.com/atom/ns#' term='Laura'/><category scheme='http://www.blogger.com/atom/ns#' term='classification'/><title type='text'>First contact with Active learning with the Transferable Belief Model for Image Annotation”</title><content type='html'>&lt;style&gt; &lt;!--  /* Font Definitions */ @font-face  {font-family:&lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;Cambria&lt;/span&gt;;  &lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;panose&lt;/span&gt;-1:2 4 5 3 5 4 6 3 2 4;  &lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;mso&lt;/span&gt;-font-&lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;charset&lt;/span&gt;:0;  &lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;mso&lt;/span&gt;-generic-font-family:auto;  &lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;mso&lt;/span&gt;-font-pitch:variable;  &lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;mso&lt;/span&gt;-font-signature:3 0 0 0 1 0;}  /* Style Definitions */ p.&lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;MsoNormal&lt;/span&gt;, &lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;li&lt;/span&gt;.&lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;MsoNormal&lt;/span&gt;, div.&lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;MsoNormal&lt;/span&gt;  {&lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;mso&lt;/span&gt;-style-parent:"";  margin-top:0cm;  margin-right:0cm;  margin-bottom:10.0pt;  margin-left:0cm;  &lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;mso&lt;/span&gt;-pagination:widow-orphan;  font-size:12.0pt;  font-family:"Times New Roman";  &lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;mso&lt;/span&gt;-&lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;ascii&lt;/span&gt;-font-family:&lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;Cambria&lt;/span&gt;;  &lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;mso&lt;/span&gt;-&lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;ascii&lt;/span&gt;-theme-font:minor-&lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;latin&lt;/span&gt;;  &lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;mso&lt;/span&gt;-&lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;fareast&lt;/span&gt;-font-family:&lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;Cambria&lt;/span&gt;;  &lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;mso&lt;/span&gt;-&lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;fareast&lt;/span&gt;-theme-font:minor-&lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;latin&lt;/span&gt;;  &lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;mso&lt;/span&gt;-&lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;hansi&lt;/span&gt;-font-family:&lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;Cambria&lt;/span&gt;;  &lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;mso&lt;/span&gt;-&lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;hansi&lt;/span&gt;-theme-font:minor-&lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;latin&lt;/span&gt;;  &lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;mso&lt;/span&gt;-&lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;bidi&lt;/span&gt;-font-family:"Times New Roman";  &lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;mso&lt;/span&gt;-&lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;bidi&lt;/span&gt;-theme-font:minor-&lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;bidi&lt;/span&gt;;} @page Section1  {size:612.0pt 792.0pt;  margin:72.0pt 90.0pt 72.0pt 90.0pt;  &lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;mso&lt;/span&gt;-header-margin:36.0pt;  &lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;mso&lt;/span&gt;-footer-margin:36.0pt;  &lt;span style="background: none repeat scroll 0% 0% yellow;" class="goog-spellcheck-word"&gt;mso&lt;/span&gt;-paper-source:0;} div.Section1  {page:Section1;} --&gt; &lt;/style&gt;     &lt;br /&gt;&lt;div class="MsoNormal" style="font-family: arial; text-align: justify;"&gt;&lt;span style="font-size: small;"&gt;This February I will go on Erasmus to Grenoble, to the "Grenoble INP", in the &lt;a href="http://www.gipsa-lab.inpg.fr/"&gt;Gipsa Lab &lt;/a&gt;(&lt;/span&gt;&lt;span class="noir" style="font-size: small;"&gt;Grenoble&lt;/span&gt;&lt;span style="font-size: small;"&gt; &lt;/span&gt;&lt;span class="cyan" style="font-size: small;"&gt;Images&lt;/span&gt;&lt;span style="font-size: small;"&gt; &lt;/span&gt;&lt;span class="vert" style="font-size: small;"&gt;Parole&lt;/span&gt;&lt;span style="font-size: small;"&gt; &lt;/span&gt;&lt;span class="bleu" style="font-size: small;"&gt;Signal&lt;/span&gt;&lt;span style="font-size: small;"&gt; &lt;/span&gt;&lt;span class="rouge" style="font-size: small;"&gt;Automatique&lt;/span&gt;&lt;span style="font-size: small;"&gt;) in the &lt;i&gt;Image et Signal&lt;/i&gt; department. I have been proposed to work in a project of active learning with &lt;a href="http://www.gipsa-lab.inpg.fr/%7Emichele.rombaut/cv_en.html"&gt;M. Rombaut&lt;/a&gt; and &lt;a href="http://www.gipsa-lab.inpg.fr/%7Edenis.pellerin/cv_en.html"&gt;D. Pellerin&lt;/a&gt;. My first task has been reading the paper &lt;a href="http://hal.archives-ouvertes.fr/docs/00/46/08/86/PDF/WTBFrombaut.pdf"&gt;Multi-labeled image classification by TBM active learning&lt;/a&gt;, presented in the &lt;i&gt;Workshop on Theory of Belief Functions (Belief 2010)&lt;/i&gt;, Brest : France (2010).&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-78908PNqqSE/TuNBQb2jQQI/AAAAAAAAAH0/AlHceQLqcBE/s1600/IMG_0004.PNG" imageanchor="1" style="clear: left; float: left; margin-bottom: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="240" src="http://1.bp.blogspot.com/-78908PNqqSE/TuNBQb2jQQI/AAAAAAAAAH0/AlHceQLqcBE/s320/IMG_0004.PNG" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;span style="font-size: small;"&gt;When I was collaborating with the TSC department I became familiarized with &lt;a href="http://en.wikipedia.org/wiki/Supervised_learning"&gt;supervised learning&lt;/a&gt;, as it was the system used in experiments such as &lt;a href="http://bitsearch.blogspot.com/2011/04/multiscale-object-detection-tested-in.html"&gt;Formula 1 publicity&lt;/a&gt;. This article is not about supervised learning, it is about &lt;a href="http://en.wikipedia.org/wiki/Active_learning_%28machine_learning%29"&gt;active learning&lt;/a&gt;, a form of supervised learning that requires the interaction of the user. There are situations when we have some many images to annotate that any help can be useful. For example, in the F1 experiment we annotated the images randomly, but a little help to know which ones were the most useful could have provided better results with less annotated images.&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span lang="EN-GB" style="font-size: small;"&gt;This system is a tool to make user’s work easier. Already labeled images in the database are used to structure the unlabeled ones and propose a suitable class for them. This assistance classification system selects images for the user which are interesting to classify according to a specific strategy and propose a label.&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="font-family: arial; text-align: justify;"&gt;&lt;span lang="EN-GB" style="font-size: small;"&gt; &lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="font-family: arial; text-align: justify;"&gt;&lt;span lang="EN-GB" style="font-size: small;"&gt;The framework is divided in two parts: an automatic part to “model the knowl&lt;/span&gt;&lt;span lang="EN-GB" style="font-size: small;"&gt;e&lt;/span&gt;&lt;span lang="EN-GB" style="font-size: small;"&gt;dge” and another part that requires the interaction of the user. Here we have a repres&lt;/span&gt;&lt;span lang="EN-GB" style="font-size: small;"&gt;entat&lt;/span&gt;&lt;span lang="EN-GB" style="font-size: small;"&gt;ion of the &lt;/span&gt;&lt;span lang="EN-GB" style="font-size: small;"&gt;syste&lt;/span&gt;&lt;span lang="EN-GB" style="font-size: small;"&gt;m&lt;/span&gt;&lt;span lang="EN-GB" style="font-size: small;"&gt;, which is &lt;/span&gt;&lt;span lang="EN-GB" style="font-size: small;"&gt;presented as three modules. As we can see, the first two modules are the ones where no interaction of the user is &lt;/span&gt;&lt;span lang="EN-GB" style="font-size: small;"&gt;required, and the ones I will explain.&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="font-family: arial; text-align: justify;"&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;span style="font-size: small;"&gt;&lt;a href="http://1.bp.blogspot.com/-goYZbYOZ5t0/TtFre-t608I/AAAAAAAAAHs/B3XLo-7xzpk/s1600/Modules.png" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img alt="" border="0" id="BLOGGER_PHOTO_ID_5679438785066750914" src="http://1.bp.blogspot.com/-goYZbYOZ5t0/TtFre-t608I/AAAAAAAAAHs/B3XLo-7xzpk/s400/Modules.png" style="float: right; height: 143px; margin: 0pt 0pt 10px 10px; width: 400px;" /&gt;&lt;/a&gt;&lt;/span&gt;&lt;/div&gt;&lt;span lang="EN-GB" style="font-size: small;"&gt;The first step consists in &lt;b&gt;modelling the knowledge&lt;/b&gt; of the labeled images to predict the relevant label of the current unlabeled image. The knowledge is modelled using the following techinques.&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="font-family: arial; text-align: justify;"&gt;&lt;span lang="EN-GB" style="font-size: small;"&gt;- &lt;u&gt;Neighbour images&lt;/u&gt; (a single one or K nearest neighbours): If image &lt;span style="font-style: italic;"&gt;u&lt;/span&gt; is close to a classified image (or a set) there is a high believe that it will be labeled the same.&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="font-family: arial; text-align: justify;"&gt;&lt;span lang="EN-GB" style="font-size: small;"&gt;- &lt;u&gt;Knowledge from all classes&lt;/u&gt;: Depending on the semantic interpretation of the images one image can be associated to several non-exclusive classes.&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="font-family: Arial,Helvetica,sans-serif; text-align: justify;"&gt;&lt;span style="font-size: small;"&gt;- &lt;u&gt;Knowledge from all characteristics&lt;/u&gt;:  This method allows to detect new classes or a new modality of a known class.&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="font-family: arial; text-align: justify;"&gt;&lt;span style="font-family: Arial,Helvetica,sans-serif; font-size: small;"&gt;The second step is the &lt;b&gt;active sampling&lt;/b&gt; module, where&lt;span lang="EN-US" style="line-height: 115%;"&gt; different criteria are presented to select the images that will be labeled by the user:&lt;/span&gt;&lt;/span&gt; &lt;/div&gt;&lt;div class="MsoNormal" style="font-family: arial; text-align: justify;"&gt;&lt;span style="font-size: small;"&gt;- &lt;u&gt;Most positive unlabeled images&lt;/u&gt;: Sometimes names "most relevant". Here the system chooses the images that are easy to classify because the visual content is very similar to already labeled images.&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="font-family: arial; text-align: justify;"&gt;&lt;span style="font-size: small;"&gt;- &lt;u&gt;Most ambiguous unlabeled images&lt;/u&gt;: This strategy consists in choosing the unlabeled image which is on the limits of all the known classes. It can also be interesting to select images that are locally most ambiguous, the ones that are on the borders of a certain class.&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="font-family: arial; text-align: justify;"&gt;&lt;span style="font-size: small;"&gt;- &lt;u&gt;Most rejected unlabele&lt;span style="font-family: Arial,Helvetica,sans-serif;"&gt;d image&lt;/span&gt;&lt;/u&gt;&lt;/span&gt;&lt;span style="font-family: Arial,Helvetica,sans-serif; font-size: small;"&gt;: &lt;span lang="EN-US" style="line-height: 115%;"&gt;Theselected images are the unlabeled&lt;/span&gt; ones tha&lt;/span&gt;&lt;span style="font-size: small;"&gt;t do not correspond to any class. They have been classified as not belonging to any of the already existing classes, so this can be useful to create a new class or correct an already existing classes.&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="font-family: arial; text-align: justify;"&gt;&lt;span style="font-size: small;"&gt;- &lt;u&gt;Most conflicted unlabeled image&lt;/u&gt;: The information fusion with all characteristics can lead to a conflict about the inclusion in one or more classes, so they might not correspond to current known classes.&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="font-family: arial; text-align: justify;"&gt;&lt;span style="font-size: small;"&gt;- &lt;u&gt;Most uncertain unlabeled image&lt;/u&gt;: The two hypothesis (belonging or not belonging to the class) have similar probabilities, so it is impossible to distinguish one hypothesis from the others.&lt;/span&gt;&lt;/div&gt;&lt;div class="MsoNormal" style="text-align: justify;"&gt;&lt;span style="font-family: arial; font-size: small;"&gt;The third step is the final classification of the selected images.&lt;/span&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-5574451357374361320?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/5574451357374361320/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/11/first-contact-with-multi-labeled-image.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/5574451357374361320'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/5574451357374361320'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/11/first-contact-with-multi-labeled-image.html' title='First contact with Active learning with the Transferable Belief Model for Image Annotation”'/><author><name>Laura</name><uri>http://www.blogger.com/profile/12096894187253130660</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://1.bp.blogspot.com/-78908PNqqSE/TuNBQb2jQQI/AAAAAAAAAH0/AlHceQLqcBE/s72-c/IMG_0004.PNG' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-9126464947924245776</id><published>2011-11-17T16:00:00.000+01:00</published><updated>2011-11-23T12:50:47.078+01:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Manel'/><category scheme='http://www.blogger.com/atom/ns#' term='navigation'/><category scheme='http://www.blogger.com/atom/ns#' term='GOS'/><title type='text'>User navigation through a tree-based visual index</title><content type='html'>&lt;br /&gt;&lt;div style="text-align: justify;"&gt;As a result of the &lt;a href="http://bitsearch.blogspot.com/2011/07/gos-using-several-search-engines.html"&gt;integration &lt;/a&gt;of HCT indexing systems, &lt;a href="http://upseek.upc.edu/gos/"&gt;GOS &lt;/a&gt;can give now a solution to the visual navigation of large databases.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;The indexing engine stores it results as a hierarchy of directories in the file system where every node of the tree-structured index is described by an MPEG-7/XML file. Each of these files define the structure of the node with a list of child nodes and the sub-tree visual representation.&lt;/div&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/-g_ZL34S2n_U/TsUaGQmhBZI/AAAAAAAAAMM/fonNMWFVpJI/s1600/20111117+-+HCTNavigation.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="237" src="http://4.bp.blogspot.com/-g_ZL34S2n_U/TsUaGQmhBZI/AAAAAAAAAMM/fonNMWFVpJI/s320/20111117+-+HCTNavigation.png" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;div style="text-align: justify;"&gt;Browsing through visual trees is really easy with GOS. Once the index file has been loaded into memory from its root, user can go down to different nodes. In this figure it is shown the root of a 200.000 images tree-structured index with five children. Double clicking on the choosen sub-tree representation thumbnail, the system will load the XML file and display its child nodes. In the last level of the tree, a black frame indicates to the user that it is a leaf.&lt;/div&gt;&lt;br /&gt;&lt;a href="http://3.bp.blogspot.com/-lrboJSciyYg/TsUaJNBMOdI/AAAAAAAAAMU/_Cv-dJCj2Cs/s1600/20111117+-+HCTNavigation+2.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em; text-align: center;"&gt;&lt;img border="0" height="428" src="http://3.bp.blogspot.com/-lrboJSciyYg/TsUaJNBMOdI/AAAAAAAAAMU/_Cv-dJCj2Cs/s640/20111117+-+HCTNavigation+2.png" width="640" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;div style="text-align: justify;"&gt;Users also have the opportunity to climb the tree using the arrow icon located in the first position of the thumbnail grid. A simple click allows the user to correct their previous navigation action.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-9126464947924245776?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/9126464947924245776/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/11/gos-indexed-database-explorer.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/9126464947924245776'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/9126464947924245776'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/11/gos-indexed-database-explorer.html' title='User navigation through a tree-based visual index'/><author><name>Manel</name><uri>http://www.blogger.com/profile/13861846623917028915</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/-g_ZL34S2n_U/TsUaGQmhBZI/AAAAAAAAAMM/fonNMWFVpJI/s72-c/20111117+-+HCTNavigation.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-2794634453568829901</id><published>2011-10-24T14:12:00.001+02:00</published><updated>2011-10-24T14:23:05.309+02:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='teaching'/><category scheme='http://www.blogger.com/atom/ns#' term='xavi'/><title type='text'>Students pick full screen multiplayer as the hottest topic of the semester</title><content type='html'>A couple of years ago I started a teaching activity where students would share an online reading extracted from the news or a technical blog. All proposed readings were qualified by the class, providing a ranking of the most appealing articles according to my students. &lt;a href="http://bitsearch.blogspot.com/2009/11/hot-tech-topics-according-to-my.html"&gt;Three years ago&lt;/a&gt; the top of the list was for an article about holographic videoconferencing and augmented reality on mobile device. This year the most valued topics in the course &lt;i&gt;Audiovisual Communications&lt;/i&gt; were related to reproduction systems and augmented reality.&lt;br /&gt;&lt;br /&gt;The &lt;a href="http://www.telegraph.co.uk/technology/video-games/8737866/LG-introduces-dual-view-televisions-for-videogamers.html"&gt;favorite reading&lt;/a&gt; this semester was the announcement from LG of a new display that exploits the ·D TV technology for a completely new purpose: full screen multiplayer gaming. Instead of providing different images to every eye and generate a depth perception, why not sending completely different images to every viewer ? The proposed application would be those video games where different players are virtually located in different spaces. So far, the classic solution was to split the screen in as many parts as players, but with the dual view technology this is no longer necessary, giving a full 2D image to every player. I guess that with the addition of two set of headphones and DTV receivers, the same philosophy could apply to watching two different TV stations simultaneously. No more discussion at home about soccer or movie night !&lt;br /&gt;&lt;br /&gt;&lt;iframe allowfullscreen="" frameborder="0" height="360" src="http://www.youtube.com/embed/SjFtNsjrZZs" width="640"&gt;&lt;/iframe&gt;&lt;br /&gt;&lt;br /&gt;Curiously the second and third most voted articles were very similar. Both of them are mobile apps that recognize a flat object (text or an image) and use them to perform some analysis whose result is embedded on the screen using the augmented reality paradigm. A first example is the &lt;a href="http://singularityhub.com/2010/12/18/word-lens-translates-the-text-you-see-in-real-time-amazing-to-behold-video/"&gt;Word Lens&lt;/a&gt; app, which detects text on screen, recognized through an OCR algorithm and translates it. So far, nothing really special, but the innovation comes from the generation of a new graphical layer with a similar font and color that replaces the real text on the screen. The student that proposed the reading tested the app and she admitted being impressed by the accuracy of the application.&lt;br /&gt;&lt;br /&gt;&lt;object height="360" width="640"&gt;&lt;param name="movie" value="http://www.youtube.com/v/h2OfQdYrHRs&amp;rel=0&amp;hl=en_US&amp;feature=player_embedded&amp;version=3"&gt;&lt;/param&gt;&lt;param name="allowFullScreen" value="true"&gt;&lt;/param&gt;&lt;param name="allowScriptAccess" value="always"&gt;&lt;/param&gt;&lt;embed src="http://www.youtube.com/v/h2OfQdYrHRs&amp;rel=0&amp;hl=en_US&amp;feature=player_embedded&amp;version=3" type="application/x-shockwave-flash" allowfullscreen="true" allowScriptAccess="always" width="640" height="360"&gt;&lt;/embed&gt;&lt;/object&gt;&lt;br /&gt;&lt;br /&gt;The third position was for an &lt;a href="http://www.nytimes.com/2011/04/08/technology/08reality.html?_r=2&amp;amp;scp=1&amp;amp;sq=recognizer&amp;amp;st=Search"&gt;article&lt;/a&gt; emulating the moving images in Harry Potter's Daily Prophet. The demo app in this case takes a snapshot of a picture on a newspaper and uses it to retrieve from the Internet a video that is embedded just in the location of the image. The proposed application was to provide an additional media linked to the newspapers, or wall ads on the street providing animated versions of them. It looks fun, but I am not sure about how practical is this application. This software is included in the &lt;a href="http://www.aurasma.com/"&gt;Aurasma&lt;/a&gt; library of the British company Autonomy.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;iframe allowfullscreen="" frameborder="0" height="360" src="http://www.youtube.com/embed/GBKy-hSedg8" width="640"&gt;&lt;/iframe&gt;&lt;br /&gt;&lt;br /&gt;Other celebrated readings referred to &lt;a href="http://web.mit.edu/newsoffice/2011/video-holography-0124.html"&gt;holografic TV&lt;/a&gt;, the &lt;a href="http://www.feedspew.com/article/siri-voice-recognition-arrives-on-the-iphone-4s-436930"&gt;Siri&lt;/a&gt; assistant for the recent iPhone 4S and a wireless controlled &lt;a href="http://www.augmentedplanet.com/2011/09/ar-drone-hits-android/"&gt;mini-helicopter&lt;/a&gt; equipped with cameras.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-2794634453568829901?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/2794634453568829901/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/10/students-pick-full-screen-multipayer-as.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/2794634453568829901'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/2794634453568829901'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/10/students-pick-full-screen-multipayer-as.html' title='Students pick full screen multiplayer as the hottest topic of the semester'/><author><name>Xavi Giró-i-Nieto</name><uri>https://profiles.google.com/110271653716466055491</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh5.googleusercontent.com/--EaBZ1Bzzsg/AAAAAAAAAAI/AAAAAAAAAwU/4J6xIhZ3Xyo/s512-c/photo.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://img.youtube.com/vi/SjFtNsjrZZs/default.jpg' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-7292116206236531348</id><published>2011-09-27T11:30:00.000+02:00</published><updated>2011-09-29T16:56:29.513+02:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Manel'/><category scheme='http://www.blogger.com/atom/ns#' term='query by image'/><category scheme='http://www.blogger.com/atom/ns#' term='GOS'/><title type='text'>Management of index servers from GOS</title><content type='html'>&lt;br /&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Trebuchet MS', sans-serif;"&gt;The first system for database indexing is already built into GOS latest version to execute faster queries.&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Trebuchet MS', sans-serif;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Trebuchet MS', sans-serif;"&gt;With the new set of tools for indexing, the user (developer profile) can run faster queries over HCT tools and create indexes as well. There are two running modes selectable in the preferences panel according to whether the index server is handled internally or externally from GOS:&lt;/span&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/-Melp85SrRJg/ToDlIKT-gwI/AAAAAAAAALw/zAPX-MhWUn0/s1600/20110926-preferences.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="265" src="http://2.bp.blogspot.com/-Melp85SrRJg/ToDlIKT-gwI/AAAAAAAAALw/zAPX-MhWUn0/s320/20110926-preferences.png" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Trebuchet MS', sans-serif;"&gt;&lt;b&gt;External&lt;/b&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: 'Trebuchet MS', sans-serif;"&gt;&lt;b&gt;&amp;nbsp;execution (expert user)&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-jRZNZPyrS-U/ToDl2r28t8I/AAAAAAAAAL4/CCjnvC7qz7g/s1600/GOShctmemquerysys.png" imageanchor="1" style="clear: right; float: right; margin-bottom: 1em; margin-left: 1em;"&gt;&lt;img border="0" height="150" src="http://1.bp.blogspot.com/-jRZNZPyrS-U/ToDl2r28t8I/AAAAAAAAAL4/CCjnvC7qz7g/s200/GOShctmemquerysys.png" width="200" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Trebuchet MS', sans-serif;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Trebuchet MS', sans-serif;"&gt;That is a simple implementation for those users who are able to run the indexing tools without using GOS. It will be used only as a search engine (query_request tool). In this case it has to be specified in which host:port your KSC server is executed and the index builder tool (hct_building)&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Trebuchet MS', sans-serif;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Trebuchet MS', sans-serif;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Trebuchet MS', sans-serif;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Trebuchet MS', sans-serif;"&gt;&lt;b&gt;Internal&lt;/b&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: 'Trebuchet MS', sans-serif;"&gt;&lt;b&gt;&amp;nbsp;execution (medium user)&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Trebuchet MS', sans-serif;"&gt;&lt;b&gt;&lt;br /&gt;&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Trebuchet MS', sans-serif;"&gt;In this mode, GOS launches all tools&amp;nbsp;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: 'Trebuchet MS', sans-serif;"&gt;involved&amp;nbsp;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: 'Trebuchet MS', sans-serif;"&gt;in the indexing process. The KSC server is executed on the port selected in the preferences panel. Then it loads the index file that the user has selected from the search space panel. Such indexes must have been previously created, for example, with the index creation interface also developed in GOS.&lt;/span&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Trebuchet MS', sans-serif;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: center;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Trebuchet MS', sans-serif;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Times New Roman';"&gt;&lt;a href="http://1.bp.blogspot.com/-NeMH8r_Cny8/ToDlKIxDRqI/AAAAAAAAAL0/tQROoB-r-0M/s1600/20110926-indexdialogfinal.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="252" src="http://1.bp.blogspot.com/-NeMH8r_Cny8/ToDlKIxDRqI/AAAAAAAAAL0/tQROoB-r-0M/s320/20110926-indexdialogfinal.png" width="320" /&gt;&lt;/a&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Trebuchet MS', sans-serif;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="font-family: 'Trebuchet MS', sans-serif;"&gt;This mode allows users to create their own indexes by opening a simple dialog. Through the dialog pages the user selects which image descriptors collections wants to add to the index and the visual descriptor index type. If more than one type is selected, GOS creates the same number of different indexes.&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-7292116206236531348?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/7292116206236531348/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/09/gos-visual-indexing-ui.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/7292116206236531348'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/7292116206236531348'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/09/gos-visual-indexing-ui.html' title='Management of index servers from GOS'/><author><name>Manel</name><uri>http://www.blogger.com/profile/13861846623917028915</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://2.bp.blogspot.com/-Melp85SrRJg/ToDlIKT-gwI/AAAAAAAAALw/zAPX-MhWUn0/s72-c/20110926-preferences.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-4713016908908334055</id><published>2011-09-26T10:33:00.002+02:00</published><updated>2011-09-26T10:51:29.779+02:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='object retrieval'/><category scheme='http://www.blogger.com/atom/ns#' term='fusion'/><category scheme='http://www.blogger.com/atom/ns#' term='xavi'/><title type='text'>Fitting of Region Non-Matches to the Weibull Distribution</title><content type='html'>&lt;br /&gt;A couple of months ago I explained &lt;a href="http://bitsearch.blogspot.com/2011/07/normalization-of-visual-distances-for.html"&gt;one strategy&lt;/a&gt; I had designed to fuse the similarity scores obtained when comparing two regions in terms of shape, color and texture. At that time, the proposed approach considered an exponential curve whose shape was determined by a parameter &lt;i&gt;beta&lt;/i&gt;. That solution though was based on an intuition that missed a firm statistical background.&lt;br /&gt;Last month both Phd advisors agreed that this assumption should sustain on better grounds. In particular, they suggested me to base this normalization curve on a widely agreed probability distribution, such as the one proposed last year by Scheirer, Rocha, Michaels and Boult in their work &lt;a href="http://www.springerlink.com/content/h847715301284284/"&gt;"Robust Fusion: Extreme Value Theory for Recognition Score Normalization"&lt;/a&gt; (ECCV 2010).&lt;br /&gt;&lt;br /&gt;&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-5Bjc4m5dw9o/Tn9sQ0QfTxI/AAAAAAAAAvM/ebu2p_xq_90/s1600/FusionEVT.png" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="142" src="http://1.bp.blogspot.com/-5Bjc4m5dw9o/Tn9sQ0QfTxI/AAAAAAAAAvM/ebu2p_xq_90/s400/FusionEVT.png" width="400" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Scheme proposed by Scheirer et al (ECCV 2010)&lt;/td&gt;&lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;br /&gt;&lt;style type="text/css"&gt; &lt;/style&gt;&lt;br /&gt;&lt;table cellpadding="0" cellspacing="0" class="tr-caption-container" style="float: right; text-align: right;"&gt;&lt;tbody&gt;&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/-138VK0VcQWg/Tn9sT6ZY7SI/AAAAAAAAAvU/6M17Icy76SY/s1600/1000px-Weibull_PDF.svg.png" imageanchor="1" style="clear: right; margin-bottom: 1em; margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="200" src="http://2.bp.blogspot.com/-138VK0VcQWg/Tn9sT6ZY7SI/AAAAAAAAAvU/6M17Icy76SY/s200/1000px-Weibull_PDF.svg.png" width="200" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Weibull PDF&lt;/td&gt;&lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;style type="text/css"&gt;p, li { white-space: pre-wrap; }&lt;/style&gt;Their proposal sustain on the &lt;a href="http://en.wikipedia.org/wiki/Extreme_value_theory"&gt;Extreme Value Theory&lt;/a&gt;, which basically provides an alternative to the &lt;a href="http://en.wikipedia.org/wiki/Central_limit_theorem"&gt;Central Limit Theorem&lt;/a&gt; when, instead of focusing on the median values, the interest relies on the extreme values of a distribution. This approach matches the classical visual retrieval problem, where the data distribution refers to the similarity scores associated to the hits that conform a ranked list. The Extreme Value Theory applied to the retrieval case states that these scores will follow a &lt;a href="http://en.wikipedia.org/wiki/Weibull_distribution"&gt;Weibull distribution&lt;/a&gt;, a curve that is determined by two parameters &lt;i&gt;k&lt;/i&gt; and &lt;i&gt;lambda&lt;/i&gt;.&lt;br /&gt;&lt;br /&gt;&lt;table cellpadding="0" cellspacing="0" class="tr-caption-container" style="float: left; text-align: left;"&gt;&lt;tbody&gt;&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/-gyAvXtQjHNM/Tn9sTtSU_II/AAAAAAAAAvQ/pJ3Uf-7FX5c/s1600/1000px-Weibull_CDF.svg.png" imageanchor="1" style="clear: left; margin-bottom: 1em; margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="200" src="http://2.bp.blogspot.com/-gyAvXtQjHNM/Tn9sTtSU_II/AAAAAAAAAvQ/pJ3Uf-7FX5c/s200/1000px-Weibull_CDF.svg.png" width="200" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Weibull CDF&lt;/td&gt;&lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;The basic idea of Scheirer et al. is to fuse the similarity scores obtained with different features by previously normalizing them through the the &lt;a href="http://en.wikipedia.org/wiki/Cumulative_distribution_function"&gt;Cumulative Distribution Function (CDF)&lt;/a&gt; of the Weibull distribution, a transformation that will map any score to a comparable range from 0 to 1. I was glad to see that, for certain combinations of the Weibull parameters, the resulting CDF curve is in fact pretty similar to the exponential I had designed, an observation that supports the assumption that the Weibull distribution can provide good results in this context.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;style type="text/css"&gt; &lt;/style&gt;&lt;br /&gt;&lt;style type="text/css"&gt;p, li { white-space: pre-wrap; }&lt;/style&gt;Scheirer et al. provide a valuable insight on how to use Weibull in the visual retrieval scenario. They noticed that, given a query, it is common to obtain a much larger amount of sample that do not match the query (irrelevant) than those that do match the query (relevant). Moreover, the probability distribution functions for match and non-match hits will probably be different. So they propose to focus on the non-match hits, learn their Weibull parameters and use the CDF of these non-match dataset to normalized all scores. And this is exactly the approach I report about in this post.&lt;br /&gt;&lt;br /&gt;I am testing the proposed scheme to combine the similarity scores of color, texture and shape when assessing the similarity between two regions in an object retrieval problem. The first step I took was to visualize the histograms of both relevant and non-relevant hits for each type of visual feature and decide whether the data distribution I deal with may match a Weibull one. The obtained results are shown below for one of the train dataset splits defined over the &lt;a href="http://bitsearch.blogspot.com/2011/08/adaptation-of-ethz-shape-database-to.html"&gt;ETHZ dataset&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/-ueY1YYEgPbU/Tn-KiI94b8I/AAAAAAAAAvY/XQ21BbYf7IA/s1600/histograms.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="280" src="http://4.bp.blogspot.com/-ueY1YYEgPbU/Tn-KiI94b8I/AAAAAAAAAvY/XQ21BbYf7IA/s320/histograms.png" width="320" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Score histograms for relevant and non-relevant hits&lt;/td&gt;&lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;br /&gt;The first observation is that indeed non-match distributions, on the right, are less noisy than the match ones because many more samples are available. Secondly, every visual feature presents different distribution shapes, so it makes lots of sense to estimate some parameters to fit a generic curve to every specific case. In some cases there are peak values at the highest and lowest bin, which basically indicates the behavior of a distance metric that collapses all distances over/below a certain threshold in the same value. Finally, there is a difference in the EVT case and the data I am displaying. Every obtained score corresponds to the best match between the query region and all the possible regions that are defined in an image, 99 in these experiments. So the results shown are already somehow extreme values. I have my doubts whether I should now only consider the top N non-match hits among this best intra-image matches, or just use all data to try to match the Weibull distribution. Given that the range of score values for relevant matches is similar to the ones for irrelevant, I decided to use the complete irrelevant histograms to be match to the Weibull.&lt;br /&gt;&lt;br /&gt;So the next step to solve was how to estimate the parameters of the Weibull distribution to match its Probability Density Function (PDF) to the obtained histograms. I follow a &lt;a href="http://www.weibull.com/LifeDataWeb/estimation_of_the_weibull_parameter.htm#rry"&gt;solution&lt;/a&gt; based on least squares. Firstly, the Weibull CDF parametrized with &lt;i&gt;k&lt;/i&gt; and &lt;i&gt;lambda&lt;/i&gt; is operated to obtain a linear expression of the form &lt;i&gt;y=bx+a&lt;/i&gt;.&lt;br /&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/-a3Ayk42R8mg/Tn-kqCku3iI/AAAAAAAAAvg/ygFlN0Q0bv0/s1600/Linealization.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="231" src="http://2.bp.blogspot.com/-a3Ayk42R8mg/Tn-kqCku3iI/AAAAAAAAAvg/ygFlN0Q0bv0/s320/Linealization.png" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;Afterwards, the &lt;a href="http://en.wikipedia.org/wiki/Least_squares"&gt;least squares&lt;/a&gt; estimation is applied to obtain the estimations of the &lt;i&gt;k&lt;/i&gt; and &lt;i&gt;lambda&lt;/i&gt; parameters that define the Weibull distribution.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://1.bp.blogspot.com/-S92aSQQM0Gk/Tn-kpHcMSxI/AAAAAAAAAvc/fxrX9vab2hc/s1600/LeastSquares.png" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="185" src="http://1.bp.blogspot.com/-S92aSQQM0Gk/Tn-kpHcMSxI/AAAAAAAAAvc/fxrX9vab2hc/s320/LeastSquares.png" width="320" /&gt;&lt;/a&gt; &lt;br /&gt;&lt;br /&gt;&lt;br /&gt;I implemented the whole estimation in Java and generated some graphs that let me debug the complete process. The next figure compares the actual data histogram with the synthesized PDF.&lt;br /&gt;&lt;br /&gt;&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-_QipVyU-3AY/ToA2CVFN6ZI/AAAAAAAAAvo/RNSB4YWZVqU/s1600/synthPDF.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="288" src="http://3.bp.blogspot.com/-_QipVyU-3AY/ToA2CVFN6ZI/AAAAAAAAAvo/RNSB4YWZVqU/s320/synthPDF.png" width="320" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Actual and Weibull synthesized PDFs&lt;/td&gt;&lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;br /&gt;&lt;br /&gt;This second figure compares the actual CDF with the one generated with the estimated &lt;i&gt;k&lt;/i&gt; and &lt;i&gt;lambda&lt;/i&gt;.&lt;br /&gt;&lt;table align="center" cellpadding="0" cellspacing="0" class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;"&gt;&lt;tbody&gt;&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/-8_CSHSmByOg/ToA2BPycEiI/AAAAAAAAAvk/q0PYy5Uhbh0/s1600/synthCDF.png" style="margin-left: auto; margin-right: auto;"&gt;&lt;img border="0" height="304" src="http://2.bp.blogspot.com/-8_CSHSmByOg/ToA2BPycEiI/AAAAAAAAAvk/q0PYy5Uhbh0/s320/synthCDF.png" width="320" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Actual and Weibull synthesized CDFs&lt;/td&gt;&lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;br /&gt;In general, the obtained curves seem to fit properly to the actual data distribution, so the obtained CDF can be effectively used to normalized the similarity scores for every visual feature. The next step will be to compare the retrieval results when fusing features with these normalization strategy to other option that I have been developing during the last months.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-4713016908908334055?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/4713016908908334055/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/09/fitting-of-region-non-matches-to.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/4713016908908334055'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/4713016908908334055'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/09/fitting-of-region-non-matches-to.html' title='Fitting of Region Non-Matches to the Weibull Distribution'/><author><name>Xavi Giró-i-Nieto</name><uri>https://profiles.google.com/110271653716466055491</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh5.googleusercontent.com/--EaBZ1Bzzsg/AAAAAAAAAAI/AAAAAAAAAwU/4J6xIhZ3Xyo/s512-c/photo.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://1.bp.blogspot.com/-5Bjc4m5dw9o/Tn9sQ0QfTxI/AAAAAAAAAvM/ebu2p_xq_90/s72-c/FusionEVT.png' height='72' width='72'/><thr:total>0</thr:total><georss:featurename>Badalona, Catalonia</georss:featurename><georss:point>41.450137 2.2474195</georss:point><georss:box>41.4025305 2.1684555 41.4977435 2.3263835</georss:box></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-7047118802583976129</id><published>2011-08-12T14:17:00.008+02:00</published><updated>2011-08-17T12:27:59.728+02:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Manel'/><category scheme='http://www.blogger.com/atom/ns#' term='query by image'/><category scheme='http://www.blogger.com/atom/ns#' term='GOS'/><title type='text'>GOS Visual Search on Precomputed Indexes</title><content type='html'>&lt;div style="text-align: center;"&gt;&lt;span class="Apple-style-span"&gt;&lt;u&gt;&lt;br /&gt;&lt;/u&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;div style="text-align: justify;"&gt;&lt;a href="http://bitsearch.blogspot.com/2011/07/gos-using-several-search-engines.html"&gt;In my previous&lt;/a&gt; post it was said that GOS is ready to support different architectures to exploit the engine for visual indexing. As mentioned before, the memory execution mode has been published for developers of the image group. &lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;In this mode the user is an expert in the field (HCT) and the indexing service is manually executed from a terminal and accessed by GOS by setting up the&lt;i&gt; KSC&lt;/i&gt; server to query an image with &lt;i&gt;query_request&lt;/i&gt;.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;a href="http://3.bp.blogspot.com/-SQ8lgVZ-nlM/TkUfAWqiHtI/AAAAAAAAALU/FL5SlSccdyg/s1600/GOShctmemquerysys.png" onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}"&gt;&lt;img src="http://3.bp.blogspot.com/-SQ8lgVZ-nlM/TkUfAWqiHtI/AAAAAAAAALU/FL5SlSccdyg/s400/GOShctmemquerysys.png" border="0" alt="" id="BLOGGER_PHOTO_ID_5639948199295852242" style="display: block; margin-top: 0px; margin-right: auto; margin-bottom: 10px; margin-left: auto; text-align: center; cursor: pointer; width: 400px; height: 301px; " /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;To keep improving, we wondered how to give users freedom to create and manage their own indexes separating user's images by domain, the image source, ultimately, different datasets  the user wants to create. To do so, Carles Ventura has developed a very interesting tool,the &lt;i&gt;database_indexing&lt;/i&gt;. It runs an hct tree to be stored on in a text file on the hard disk.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;With the help of this binary, GOS is not only capable to perform hct queries(for developers) but also can read indexes previously generated with it and thus achieve greater freedom to search for all kind of users. With the option of reading previously created indexes from disk we significantly reduce the initialization of the indexing service, as it is not necessary no longer to create the free from scratch in every execution, it is enough reading the saved index from a file.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;a href="http://1.bp.blogspot.com/-Kh1BP2HtVi4/TkUfSkbfVdI/AAAAAAAAALc/iJHCT36Hs60/s1600/GOShctindexcreation.png" onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}"&gt;&lt;img src="http://1.bp.blogspot.com/-Kh1BP2HtVi4/TkUfSkbfVdI/AAAAAAAAALc/iJHCT36Hs60/s400/GOShctindexcreation.png" border="0" alt="" id="BLOGGER_PHOTO_ID_5639948512228496850" style="display: block; margin-top: 0px; margin-right: auto; margin-bottom: 10px; margin-left: auto; text-align: center; cursor: pointer; width: 400px; height: 230px; " /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;To sum up, the features supported by GOS have increased with the possibility of launching visual searches on indexed datasets. A single click internally handles a query by executing the &lt;i&gt;KSC&lt;/i&gt; message manager, the &lt;i&gt;hct_building&lt;/i&gt; tool and querying with the &lt;i&gt;query_request&lt;/i&gt; tool in a way completely transparent for the user.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;a href="http://1.bp.blogspot.com/-_MyYLHaDBfE/TkUfg7NSxkI/AAAAAAAAALk/V_g6LKUDq8k/s1600/GOShctindquery.png" onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}"&gt;&lt;img src="http://1.bp.blogspot.com/-_MyYLHaDBfE/TkUfg7NSxkI/AAAAAAAAALk/V_g6LKUDq8k/s400/GOShctindquery.png" border="0" alt="" id="BLOGGER_PHOTO_ID_5639948758861137474" style="display: block; margin-top: 0px; margin-right: auto; margin-bottom: 10px; margin-left: auto; text-align: center; cursor: pointer; width: 400px; height: 214px; " /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;Should be noted that the user can perform multiple queries in one GOS session because it keeps the KSC and hct_building threads active listening for new query_request connections. &lt;/div&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-7047118802583976129?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/7047118802583976129/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/08/integration-of-visual-indexing-engine.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/7047118802583976129'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/7047118802583976129'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/08/integration-of-visual-indexing-engine.html' title='GOS Visual Search on Precomputed Indexes'/><author><name>Manel</name><uri>http://www.blogger.com/profile/13861846623917028915</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://3.bp.blogspot.com/-SQ8lgVZ-nlM/TkUfAWqiHtI/AAAAAAAAALU/FL5SlSccdyg/s72-c/GOShctmemquerysys.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-4729217794889846756</id><published>2011-08-02T17:37:00.008+02:00</published><updated>2011-08-04T19:46:34.465+02:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Monica'/><category scheme='http://www.blogger.com/atom/ns#' term='text'/><title type='text'>Evaluation of tags suggested from a video asset</title><content type='html'>&lt;div style="text-align: justify;"&gt;&lt;span style="" lang="EN-US"&gt;During this month I implemented a basic tag suggestion. The first task was a system that suggests tags from an image. I explained how it works in this &lt;a href="http://bitsearch.blogspot.com/2011/07/tag-recomendation-from-image.html"&gt;post&lt;/a&gt; a few weeks ago. Since we work with assets and the textual metadata are annoted at the level of asset, the second task was to suggest tags from assets.&lt;/span&gt;&lt;span style="" lang="EN-US"&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="font-weight: bold; color: rgb(51, 204, 255);"&gt;Tag suggested from assets&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;Every video asset n the system contains a collection of automatically extracted keyframes that provide a rough overview of its contents. For every &lt;/span&gt;&lt;span style="" lang="EN-US"&gt;keyframe of an asset, a list &lt;/span&gt;&lt;span style="" lang="EN-US"&gt;of tags is suggested in the same way as the first task. To obtain a result at the level of asset, the system combines the suggestions obtained at the level of keyframe. This combination is based on the sum of the scores of the tags.&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/-dcd2rAjnF7A/TjgaKtGj6HI/AAAAAAAAAM8/1JiXQ6em6cQ/s1600/asset_tags.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 400px; height: 159px;" src="http://4.bp.blogspot.com/-dcd2rAjnF7A/TjgaKtGj6HI/AAAAAAAAAM8/1JiXQ6em6cQ/s400/asset_tags.png" alt="" id="BLOGGER_PHOTO_ID_5636283704862304370" border="0" /&gt;&lt;/a&gt;&lt;span style="font-weight: bold;"&gt;Dataset&lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;&lt;/span&gt;&lt;span style="" lang="EN-US"&gt;&lt;br /&gt;I created a database with a few assets of different topics in order to train the system. In particular I focused in the topics of Football, Table Tennis, Formula 1 and Parliament. &lt;/span&gt;Furthermore, I chose another group of assets that had the same topics that were in the dataset for testing the system.&lt;br /&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;Measures &lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="" lang="EN-US"&gt;The results have been evaluated in term of &lt;a href="http://bitsearch.blogspot.com/2010/03/retrieval-systems-evaluation.html"&gt;Average Precision&lt;/a&gt; at the level of assets. Also, the Mean Average Precision allows us to see if there is any topic easier to suggest than others.&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;Results&lt;/span&gt;&lt;span style="" lang="EN-US"&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/-wjHvmsF_DUc/TjgaKb4ELRI/AAAAAAAAAM0/AAPql6KRaew/s1600/results_tags.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 400px; height: 194px;" src="http://4.bp.blogspot.com/-wjHvmsF_DUc/TjgaKb4ELRI/AAAAAAAAAM0/AAPql6KRaew/s400/results_tags.png" alt="" id="BLOGGER_PHOTO_ID_5636283700238101778" border="0" /&gt;&lt;/a&gt;&lt;/span&gt;&lt;span style="font-weight: bold;"&gt;Conclusions&lt;/span&gt;&lt;span style="" lang="EN-US"&gt;&lt;/span&gt;&lt;span style="" lang="EN-US"&gt;&lt;br /&gt;&lt;/span&gt;&lt;ul&gt;&lt;li&gt;&lt;span style="" lang="EN-US"&gt;The low precision in Tennis Table is due to the system suggests tags of the topics: Football and Formula 1. This is because the visual similarity between the assets for testing and training are lower than Football and Formula 1 ones.&lt;br /&gt;&lt;/span&gt;&lt;/li&gt;&lt;/ul&gt;&lt;ul&gt;&lt;li&gt;&lt;span style="" lang="EN-US"&gt;In order to evaluate properly the topic of Formula 1 we should test with more assets. The results are based on one visual descriptor, MPEG-7 Color Layout. Maybe, we'll obtain better results f we use other visual descriptors or a combination of they. Another solution is improving the system through object detection systems.&lt;br /&gt;&lt;/span&gt;&lt;/li&gt;&lt;/ul&gt;&lt;ul&gt;&lt;li&gt;&lt;span style="" lang="EN-US"&gt;The best results are shown in the Parliament and Football topics. These results are due to  the keyframes of these topics are very similar. Moreover, in the case of Parliament, a large part of their tags appear in all assets, so is easier to suggest tags.&lt;/span&gt;&lt;/li&gt;&lt;/ul&gt;&lt;span style="" lang="EN-US"&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-4729217794889846756?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/4729217794889846756/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/08/evaluation-of-tags-suggested-from-asset.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/4729217794889846756'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/4729217794889846756'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/08/evaluation-of-tags-suggested-from-asset.html' title='Evaluation of tags suggested from a video asset'/><author><name>Monica</name><uri>http://www.blogger.com/profile/04558100039934227329</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/-dcd2rAjnF7A/TjgaKtGj6HI/AAAAAAAAAM8/1JiXQ6em6cQ/s72-c/asset_tags.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-2903712089215723455</id><published>2011-08-01T22:42:00.012+02:00</published><updated>2011-08-05T22:52:39.635+02:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='segmentation'/><category scheme='http://www.blogger.com/atom/ns#' term='evaluation'/><category scheme='http://www.blogger.com/atom/ns#' term='xavi'/><title type='text'>Adaptation of the ETHZ Shape database to image partitions</title><content type='html'>For the last year I have been working with the &lt;a href="http://www.vision.ee.ethz.ch/datasets/index.en.html"&gt;ETHZ&lt;/a&gt; shape classes dataset to run my experiments. As &lt;a href="http://bitsearch.blogspot.com/2010/08/interactive-object-segmentation-with.html"&gt;reported&lt;/a&gt; one year ago, the objects from the five categories were annotated with bounding boxes, which were &lt;a href="http://bitsearch.blogspot.com/2010/08/rectangle-expansion-and-inclusion.html"&gt;mapped&lt;/a&gt; into the segmentations I use in my research. The problem of this approach is that, although simple, it introduces many noisy regions from the background that make it difficult to evaluate the next techniques by themselves. For this reason, it was decided to also exploit the pseudo-masks which are also provided in the dataset. And this has not been easy at all.&lt;br /&gt;&lt;br /&gt;I write pseudo-masks because in fact they are contours of the annotated objects, but not all of them are not closed. In the region-based approach I follow, the basic work unit is the region which, by definition, is surrounded by a closed contour. So the first task I had to complete was to review all provided masks and manually add the pixels necessary to close them. The main problem was on the legs of the giraffes, but some other contours also had a very thin line hardly noticeable that needed to be closed.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://4.bp.blogspot.com/-vbdUiVQ_wdI/TjwKZaP8nlI/AAAAAAAAAts/u4gheMxZNKg/s1600/contour.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 182px;" src="http://4.bp.blogspot.com/-vbdUiVQ_wdI/TjwKZaP8nlI/AAAAAAAAAts/u4gheMxZNKg/s400/contour.png" alt="" id="BLOGGER_PHOTO_ID_5637392265220234834" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;The next problem was the variability in the file naming. Three object categories (applelogos, bottles and giraffes) use a suffix for the contour files, and the two other categories (mugs and swans) use a different extension. This diversity introduced some complexity when writing my scripts and Java software to manage the dataset. Moreover, in the first set every instance of an object was in a separate  mask file, while in the two last categories they were all in the same  image, so I had to manually split them and name the files consistently  with the first scheme.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://1.bp.blogspot.com/-SEO5jatCmM4/TjwKZchV3MI/AAAAAAAAAt0/pbJCHTLI79k/s1600/mask.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 83px;" src="http://1.bp.blogspot.com/-SEO5jatCmM4/TjwKZchV3MI/AAAAAAAAAt0/pbJCHTLI79k/s400/mask.png" alt="" id="BLOGGER_PHOTO_ID_5637392265830063298" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;In order to work with the dataset and evaluate the impact of the rough selections with the bounding boxes, I designed an architecture divided in two stages. In the first stage, the goal was to obtain a binary mask of the object. Given the available software, I decided to segment the closed contours provided in the ETHZ dataset and then apply the same &lt;a href="http://bitsearch.blogspot.com/2010/08/rectangle-expansion-and-inclusion.html"&gt;bounding box to regions&lt;/a&gt; mechanism I had previously applied on the original images. As a result, I would select a set of regions of the segmented contour image, which would define an object mask.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://2.bp.blogspot.com/-lwG_SkBaZ0A/TjxX_ZliWaI/AAAAAAAAAuc/X9IEaPqsTKw/s1600/GroundTruthMask.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 172px;" src="http://2.bp.blogspot.com/-lwG_SkBaZ0A/TjxX_ZliWaI/AAAAAAAAAuc/X9IEaPqsTKw/s400/GroundTruthMask.png" alt="" id="BLOGGER_PHOTO_ID_5637477580272720290" border="0" /&gt;&lt;/a&gt;This stage also created some problems, as I discovered that the provided bounding boxes did not match the positions of the provided contours. Especially again in the giraffes category, many of the rectangles did not include completely the contour, so my mapping algorithm would miss them. The solution was to manually determine again the bounding boxes and modify the provided ground truth.&lt;br /&gt;&lt;br /&gt;Once the mask for the object was generated, I &lt;a href="http://bitsearch.blogspot.com/2010/08/mapping-masks-on-bpt-leaves.html"&gt;mapped the mask&lt;/a&gt; on the image segments that had been automatically generated. I used the same mechanism I had developed last Summer for the TRECVID instance search campaign,  with a new modification. The measure that determine if a partially overlapped region was to be considered was based now on the &lt;a href="http://bitsearch.blogspot.com/2010/08/methodology-for-evaluating-interactive.html"&gt;Jaccard index&lt;/a&gt; instead of the precision. In particular, I forced a minimum Jaccard index of 0.7 to select the region.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://2.bp.blogspot.com/-5F1BgzFtUO4/TjwMuWF6s2I/AAAAAAAAAuM/vdsWCJA4lrc/s1600/BptMask.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 172px;" src="http://2.bp.blogspot.com/-5F1BgzFtUO4/TjwMuWF6s2I/AAAAAAAAAuM/vdsWCJA4lrc/s400/BptMask.png" alt="" id="BLOGGER_PHOTO_ID_5637394823904932706" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;At the end the mapping of the ground truth masks on the image segmentations defined a set of regions that could be compared with the ground truth to assess their quality. As I was preparing a dataset to be used in further retrieval and classification tasks, I am not interested in working with a dataset that does not represent the objects that are supposed to be represented. For this reason, I decided to work only with those object instances whose representation on the image partitions is equal or better than a minimum value of the Jaccard Index, which was also taken as 0.7.&lt;br /&gt;&lt;br /&gt;Finally, I completed my study by evaluating the impact of using accurate masks or rough bounding boxes to select regions on the considered image segmentations. For this reason I compared the Jaccard Index and the bounding box occupation of the the types of BPT masks (the one generated with the ground truth mask and the one generated with the ground truth bounding box). The results shown below clearly state that those objects with a larger occupation in the bounding box (apples, botlles, mugs) offer a better accuracy than those more complex objects, such as the giraffes and the swans.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://4.bp.blogspot.com/-KPUAH9NOJvI/TjwNwV9s3VI/AAAAAAAAAuU/VFKWWix_Mb0/s1600/Graphs.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 159px;" src="http://4.bp.blogspot.com/-KPUAH9NOJvI/TjwNwV9s3VI/AAAAAAAAAuU/VFKWWix_Mb0/s400/Graphs.png" alt="" id="BLOGGER_PHOTO_ID_5637395957741837650" border="0" /&gt;&lt;/a&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-2903712089215723455?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/2903712089215723455/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/08/adaptation-of-ethz-shape-database-to.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/2903712089215723455'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/2903712089215723455'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/08/adaptation-of-ethz-shape-database-to.html' title='Adaptation of the ETHZ Shape database to image partitions'/><author><name>Xavi Giró-i-Nieto</name><uri>https://profiles.google.com/110271653716466055491</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh5.googleusercontent.com/--EaBZ1Bzzsg/AAAAAAAAAAI/AAAAAAAAAwU/4J6xIhZ3Xyo/s512-c/photo.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/-vbdUiVQ_wdI/TjwKZaP8nlI/AAAAAAAAAts/u4gheMxZNKg/s72-c/contour.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-5964428978747518099</id><published>2011-07-25T14:59:00.010+02:00</published><updated>2011-08-06T14:46:43.484+02:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Manel'/><category scheme='http://www.blogger.com/atom/ns#' term='query by image'/><category scheme='http://www.blogger.com/atom/ns#' term='GOS'/><title type='text'>Integration of a visual indexing engine for local and remote searches</title><content type='html'>&lt;div style="text-align: center;"&gt;&lt;span class="Apple-style-span" &gt;&lt;u&gt;&lt;br /&gt;&lt;/u&gt;&lt;/span&gt;&lt;/div&gt;&lt;a href="http://4.bp.blogspot.com/-IzKeMI-zhgU/Ti1qpZxbBDI/AAAAAAAAAKc/H4043TQ45NU/s1600/hctqueryengine.png" onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}"&gt;&lt;/a&gt;&lt;div style="text-align: justify;"&gt;One of the problems that appeared when using &lt;a href="http://upseek.upc.edu/gos/"&gt;GOS (Graphic Object Searcher)&lt;/a&gt;, a user interface developed by the UPSeek group for the retrieval of similar images, was the slow searches. The possibilities on visual search offered by our engine were overshadowed due to the long response time after the user runs the query in a moderate/large database. The reason of this delay is the lack of any indexing structure for the stored data, a scenario that forces the search engine to explore the whole database in every query.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;We have seen that GOS can perform image-based queries with a search engine developed for 32-bit Linux systems (deprecated within the research group). Then we saw its possibilities as expanded services including &lt;a href="http://bitsearch.blogspot.com/2009/12/looking-for-similar-images-in-fedora.html"&gt;web based queries on UPSeek server&lt;/a&gt;. GOS became an UI that could be used in all operating systems and, therefore, opened to many other types of user.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;GOS finally included the possibility of an interactive segmentation of images for region selection and &lt;a href="http://bitsearch.blogspot.com/2010/11/visual-search-based-on-query-by-binary.html"&gt;region-based queries&lt;/a&gt;. Again, the response time of the search engine was an important step to resolve to provide good service that, on the other hand,&lt;a href="http://bitsearch.blogspot.com/search/label/xavi"&gt;improves every day&lt;/a&gt;.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;The time has arrived for a very important step to be developed an efficient indexing system for image descriptors which support the query. This system is based on &lt;a href="http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=4032612"&gt;Hierarchical Cellular Tree (HCT)&lt;/a&gt;, a process that creates an indexed tree in memory with a database file input.&lt;/div&gt;&lt;div style="text-align: center;"&gt;&lt;span class="Apple-style-span" style="color: rgb(0, 0, 238); "&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;The system consists of different tools that communicate each other using a simple socket language for exchanging data called KSC. KSCenter acts as an intermediary to receive all requests from the various tools and send them to whoever need them.&lt;/div&gt;&lt;div style="text-align: center;"&gt;&lt;span class="Apple-style-span" style="color: rgb(0, 0, 238); "&gt;&lt;span class="Apple-style-span" style="color: rgb(0, 0, 0); "&gt;&lt;a href="http://4.bp.blogspot.com/-CRMJlNpmK78/Tj02WSpHx2I/AAAAAAAAAK0/2hZXOJWkKPw/s1600/hctqueryengine.png" onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}"&gt;&lt;img src="http://4.bp.blogspot.com/-CRMJlNpmK78/Tj02WSpHx2I/AAAAAAAAAK0/2hZXOJWkKPw/s400/hctqueryengine.png" border="0" alt="" id="BLOGGER_PHOTO_ID_5637722065127065442" style="display: block; margin-top: 0px; margin-right: auto; margin-bottom: 10px; margin-left: auto; text-align: center; cursor: pointer; width: 400px; height: 222px; " /&gt;&lt;/a&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="color: rgb(0, 0, 238); "&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;These HCT tools have been developed for indexing the Image Group visual descriptors' for Carles Ventura's master thesis:&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;ul&gt;&lt;li&gt;&lt;b&gt;hct_building&lt;/b&gt;: executes the hct tree stored in memory by using a database text file or previous trees stored on hard disk using database_indexing.&lt;/li&gt;&lt;li&gt;&lt;b&gt;database_indexing&lt;/b&gt;: run an hct tree to be stored on in a text file on the hard disk.&lt;/li&gt;&lt;li&gt;&lt;b&gt;query_request&lt;/b&gt;: run queries to the memory hct tree built by hct_building.&lt;/li&gt;&lt;/ul&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;This new system is to be used from GOS in a transparent manner for the user to offer efficient queries both in results and runtime.&lt;/div&gt;&lt;a href="http://3.bp.blogspot.com/-DswQgKZQ51E/Tj02hMBLqWI/AAAAAAAAAK8/zqYc6vOQmWM/s1600/GOShctquery.png" onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}"&gt;&lt;img src="http://3.bp.blogspot.com/-DswQgKZQ51E/Tj02hMBLqWI/AAAAAAAAAK8/zqYc6vOQmWM/s400/GOShctquery.png" border="0" alt="" id="BLOGGER_PHOTO_ID_5637722252327496034" style="display: block; margin-top: 0px; margin-right: auto; margin-bottom: 10px; margin-left: auto; text-align: center; cursor: pointer; width: 400px; height: 253px; " /&gt;&lt;/a&gt;&lt;div&gt;&lt;div style="text-align: center;"&gt;&lt;span class="Apple-style-span"&gt;&lt;u&gt;&lt;br /&gt;&lt;/u&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;Moreover, this is the system that adopts the UPSeek server to provide the service remotely. GOS, in remote mode and selecting the port that is provided by the development team for each user, makes a webservice request to manage the SimilarImages execution of the query_request tool and return the results to the client.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;a href="http://1.bp.blogspot.com/-m5EBQ6LMRDw/Tj03g9LSiTI/AAAAAAAAALM/Lmk4oSmwde8/s1600/GOSremotehctquery.png" onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}"&gt;&lt;img src="http://1.bp.blogspot.com/-m5EBQ6LMRDw/Tj03g9LSiTI/AAAAAAAAALM/Lmk4oSmwde8/s400/GOSremotehctquery.png" border="0" alt="" id="BLOGGER_PHOTO_ID_5637723347854985522" style="display: block; margin-top: 0px; margin-right: auto; margin-bottom: 10px; margin-left: auto; text-align: center; cursor: pointer; width: 400px; height: 298px; " /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;With these new tools we can search on large databases (of order 200K nodes) within a very small time (seconds) but, what if the user prefers to have multiple search indexes? Separate all your images on different topics, situations, places… This freedom will be hosted by GOS and a new tool, the database_indexing, which generates trees and save them to disk for a faster creation and reading of the tree. But this is still a mode of execution to be developed and published in the future.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;GOS queries become versatile and powerful over a simple GUI used for all kind of users.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-5964428978747518099?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/5964428978747518099/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/07/gos-using-several-search-engines.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/5964428978747518099'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/5964428978747518099'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/07/gos-using-several-search-engines.html' title='Integration of a visual indexing engine for local and remote searches'/><author><name>Manel</name><uri>http://www.blogger.com/profile/13861846623917028915</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/-CRMJlNpmK78/Tj02WSpHx2I/AAAAAAAAAK0/2hZXOJWkKPw/s72-c/hctqueryengine.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-6056605331271532959</id><published>2011-07-06T21:25:00.013+02:00</published><updated>2011-07-07T10:13:36.063+02:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Monica'/><category scheme='http://www.blogger.com/atom/ns#' term='text'/><title type='text'>Step by step of a tag suggestion from an image</title><content type='html'>In order to get ideas for start to work in the algorithm of tag recommendation, I will summarize the description of the algorithm found in the paper &lt;a href="http://www.springerlink.com/content/4382q507494k5g75/"&gt;Scalable search-based image annotation&lt;/a&gt; by Changu Wang, Feng Jing, Lei Zhang and Hong-Jiang Zhang.&lt;br /&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;They focus on annotation of large personal image collections. Unlike web images that have rich metadata such filename, URL and surrounding text for indexing and searching, personal images have little textual information. They propose a scalable search-based image annotation (SBIA) algorithm which is analogous to information retrieval.&lt;br /&gt;&lt;br /&gt;The algorithm has four steps:&lt;br /&gt;&lt;br /&gt;&lt;span style="font-weight: bold; color: rgb(51, 204, 255);"&gt;Step 1. Content-based image retrieval&lt;/span&gt;&lt;br /&gt;For a target image, a typical CBIR technique is used to retrieve a set of visually similar images denoted by S. For each retrieved image, the distance between image i and the target image is denoted as Dist(i).&lt;br /&gt;&lt;br /&gt;&lt;span style="font-weight: bold; color: rgb(51, 204, 255);"&gt;Step 2. Text-based keyword&lt;/span&gt;&lt;span style="font-weight: bold; color: rgb(51, 204, 255);"&gt; search&lt;/span&gt;&lt;br /&gt;For each retrieved image &lt;span style="font-style: italic;"&gt;i &lt;/span&gt;a text-based keyword search process is used to rank all the related keywords. The related keywords are the ones that appear in title or description except the stop words.&lt;br /&gt;&lt;br /&gt;For each retrieved image&lt;span style="font-style: italic;"&gt; i&lt;/span&gt;, the set of related keywords are denoted as &lt;span style="font-style: italic;"&gt;Ki &lt;/span&gt;and the combination of all &lt;span style="font-style: italic;"&gt;Ki&lt;/span&gt; for all images in &lt;span style="font-style: italic;"&gt;S &lt;/span&gt;is denoted as &lt;span style="font-style: italic;"&gt;K&lt;/span&gt;. For each keyword &lt;span style="font-style: italic;"&gt;Kj&lt;/span&gt; in &lt;span style="font-style: italic;"&gt;K&lt;/span&gt;, its relevance score to retrieved image &lt;span style="font-style: italic;"&gt;i&lt;/span&gt; could be calculated with two strategies:&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;&lt;br /&gt;1.  Prominence score&lt;/span&gt;: it reflects the prominence of keyword to annotate an image.&lt;br /&gt;&lt;span style="font-weight: normal;"&gt;&lt;span style="font-weight: bold;"&gt;&lt;span style="font-weight: normal;"&gt;&lt;span style="font-weight: bold;"&gt;&lt;span style="font-weight: normal;"&gt;&lt;span style="font-weight: bold;"&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/-a-PZgUc0mW4/ThVd0hiOsbI/AAAAAAAAAME/Hg6Uwov1DwI/s1600/sc_prom.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 400px; height: 73px;" src="http://4.bp.blogspot.com/-a-PZgUc0mW4/ThVd0hiOsbI/AAAAAAAAAME/Hg6Uwov1DwI/s400/sc_prom.png" alt="" id="BLOGGER_PHOTO_ID_5626506466405822898" border="0" /&gt;&lt;/a&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span style="font-weight: normal;"&gt;where occurrence(i, j) denotes the number of &lt;span style="font-style: italic;"&gt;K&lt;/span&gt;&lt;/span&gt;&lt;span style="font-weight: normal;"&gt;&lt;span style="font-style: italic;"&gt;j &lt;/span&gt;in title or description of retrieved image.&lt;/span&gt;&lt;br /&gt;&lt;span style="font-weight: normal;"&gt;&lt;span style="font-weight: bold;"&gt;&lt;br /&gt;2. IF-IKF:&lt;/span&gt; It is based on the &lt;a href="http://bitsearch.blogspot.com/2011/01/ranking-documents-based-on-tf-idf.html"&gt;TF-IDF weighted&lt;/a&gt; scheme that is used in information retrieval. In this paper the images correspond to the textual terms, while the annotation keywords correspond to the documents in information&lt;/span&gt;&lt;span style="font-weight: normal;"&gt; re&lt;/span&gt;&lt;span style="font-weight: normal;"&gt;trieval. Therefore, a keyword could be considered as a document with several related images being terms.&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;So &lt;span style="font-weight: bold;"&gt;image frequency&lt;/span&gt;&lt;span style="font-style: italic;"&gt; (IF)&lt;/span&gt; is defined as the number of retrieved images in &lt;span style="font-style: italic;"&gt;S &lt;/span&gt;that contain  &lt;span style="font-style: italic;"&gt;Kj&lt;/span&gt;. And the &lt;span style="font-weight: bold;"&gt;keyword frequency&lt;/span&gt; &lt;span style="font-style: italic;"&gt;(KF)&lt;/span&gt; if an image could be defined as the number of related keywords of the image.&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://3.bp.blogspot.com/-oZr_TM2G93o/ThVf80hEXyI/AAAAAAAAAMM/GBTv2EhzH3U/s1600/sc_ifikf.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 400px; height: 60px;" src="http://3.bp.blogspot.com/-oZr_TM2G93o/ThVf80hEXyI/AAAAAAAAAMM/GBTv2EhzH3U/s400/sc_ifikf.png" alt="" id="BLOGGER_PHOTO_ID_5626508807963434786" border="0" /&gt;&lt;/a&gt;As a result, each retrieved image has a textual descriptor. This textual descriptor is represented by a list of keyword-score pairs.&lt;br /&gt;&lt;br /&gt;&lt;span style="font-weight: bold; color: rgb(51, 204, 255);"&gt;Step 3. Fusion algorithm&lt;/span&gt;&lt;br /&gt;The ranked lists of all similar images in&lt;span style="font-style: italic;"&gt; S &lt;/span&gt;are combined to get the final candidate keyword of the target image. Considering that more similar image should have more impact on the final annotation results, they use the following expression to score and rank the keywords &lt;span style="font-style: italic;"&gt;Kj&lt;/span&gt;:&lt;br /&gt;&lt;span style="font-weight: normal;"&gt;&lt;span style="font-weight: bold;"&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/-7Dr_ua7ZLZE/ThVhIGlw9MI/AAAAAAAAAMU/hf7M_9ip19E/s1600/sc_relevan.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 400px; height: 60px;" src="http://4.bp.blogspot.com/-7Dr_ua7ZLZE/ThVhIGlw9MI/AAAAAAAAAMU/hf7M_9ip19E/s400/sc_relevan.png" alt="" id="BLOGGER_PHOTO_ID_5626510101305160898" border="0" /&gt;&lt;/a&gt;&lt;/span&gt;&lt;/span&gt;where f(*) is a function that transforms distance to similarity.&lt;br /&gt;&lt;br /&gt;Once the relevance score of each keyword in K is obtained, they can pick up the final top N candidate annotations.&lt;br /&gt;&lt;br /&gt;&lt;span style="color: rgb(51, 204, 255); font-weight: bold;"&gt;Step 4. Re-ranking annotations using Random Walk&lt;/span&gt;&lt;br /&gt;Finally, the candidate annotations are re-ranked using &lt;a href="http://bitsearch.blogspot.com/2010/07/random-walk-step-by-step.html"&gt;Random Walk.&lt;/a&gt; The basic idea is that highly correlated keywords should be ranked higher. Consider a graph G whose nodes represent a candidate annotation &lt;span style="font-style: italic;"&gt;wi&lt;/span&gt; and its edges are weighted. This weight is defined based on the "co-occurrence" similarity.&lt;br /&gt;&lt;br /&gt;They &lt;span class="hps" title="Haz clic para obtener otras posibles traducciones"&gt;used a&lt;/span&gt; &lt;span class="hps" title="Haz clic para obtener otras posibles traducciones"&gt;image search engine&lt;/span&gt; &lt;span class="hps" title="Haz clic para obtener otras posibles traducciones"&gt;called&lt;/span&gt; &lt;span class="hps" title="Haz clic para obtener otras posibles traducciones"&gt;EnjoyPhoto&lt;/span&gt; &lt;span class="hps" title="Haz clic para obtener otras posibles traducciones"&gt;to calculate&lt;/span&gt; &lt;span class="hps" title="Haz clic para obtener otras posibles traducciones"&gt;this weight&lt;/span&gt;&lt;span title="Haz clic para obtener otras posibles traducciones"&gt;.&lt;/span&gt; Each keyword &lt;span style="font-style: italic;"&gt;wi&lt;/span&gt; will be used as a query to query EnjoyPhoto. The number of search results is denoted as num(i ). For two different word &lt;span style="font-style: italic;"&gt;wi&lt;/span&gt; and &lt;span style="font-style: italic;"&gt;wj&lt;/span&gt;,  “&lt;span style="font-style: italic;"&gt;w&lt;/span&gt;&lt;span style="font-style: italic;"&gt;iwj&lt;/span&gt;” will be used as the query. The number of search results is denoted as num(i, j ). The weight of the edge between &lt;span style="font-style: italic;"&gt;wi&lt;/span&gt; and &lt;span style="font-style: italic;"&gt;wj&lt;/span&gt; is&lt;br /&gt;&lt;div style="text-align: justify;"&gt;then calculated as:&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/-1-GsxX9-nKM/ThVi01wnSkI/AAAAAAAAAMc/f08HJcZR8O8/s1600/sim_weight.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 400px; height: 74px;" src="http://2.bp.blogspot.com/-1-GsxX9-nKM/ThVi01wnSkI/AAAAAAAAAMc/f08HJcZR8O8/s400/sim_weight.png" alt="" id="BLOGGER_PHOTO_ID_5626511969393003074" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;Once the graph G is constructed, they compute the Random Walk algorithm, as we did in the &lt;a href="http://bitsearch.blogspot.com/2010/07/scheme-of-reranking-results-for-video.html"&gt;Reranking algorithm&lt;/a&gt;, to suggest the best candidate annotations for the target image.&lt;br /&gt;&lt;/div&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-6056605331271532959?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/6056605331271532959/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/07/step-by-step-of-tag-suggestion-from.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/6056605331271532959'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/6056605331271532959'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/07/step-by-step-of-tag-suggestion-from.html' title='Step by step of a tag suggestion from an image'/><author><name>Monica</name><uri>http://www.blogger.com/profile/04558100039934227329</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/-a-PZgUc0mW4/ThVd0hiOsbI/AAAAAAAAAME/Hg6Uwov1DwI/s72-c/sc_prom.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-1632893289991535634</id><published>2011-07-06T17:49:00.049+02:00</published><updated>2011-07-11T19:13:43.944+02:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='fusion'/><category scheme='http://www.blogger.com/atom/ns#' term='xavi'/><category scheme='http://www.blogger.com/atom/ns#' term='descriptors'/><title type='text'>Normalization of visual distances for fusion in retrieval</title><content type='html'>In my &lt;a href="http://bitsearch.blogspot.com/2011/07/why-mahalanobis-distance-was-discarded.html"&gt;previous post&lt;/a&gt; I presented why normalizing feature vectors was discarded when working with three types of visual descriptors defined by the MPEG-7 standard. The proposed direction was to focus the normalizing efforts on the similarity metrics (distances) that are proposed for each of these descriptors.&lt;br /&gt;&lt;br /&gt;During my first tests with the distances, I noticed that the dynamic range for each of them was completely different. While all of them would be zero-valued when comparing two identical descriptors, the maximum possible figure was variable depending on the type of descriptor. So I decided to compute all mutual distances between the regions of my dataset and determine, for each type of descriptor, the mean value. The mean was chosen in front of the maximum value because in visually empirical tests, pairs of regions completely dissimilar from the descriptor point of view were already measured with distance values below the average. So it was considered that the subjective point of "different" applied to all comparisons with distance equal to the mean and above. So after 317,430,375 comparisons, the three mean distances were learned for every descriptor were: 1.155 for Dominant Colors, 0.302 for Contour Shape and 9.349 for Edge Histogram.&lt;br /&gt;&lt;br /&gt;These maximum distances were used to linearly normalize the raw distances so that the maximum distance would be mapped into 1.0. I thought this might have been enough but, when I queried the database with a &lt;a href="http://bitsearch.blogspot.com/2011/06/comparing-mpeg-7-color-texture-and.html"&gt;multicolor-stripped Apple logo&lt;/a&gt;, I obtained the results I previously &lt;a href="http://bitsearch.blogspot.com/2011/06/comparing-mpeg-7-color-texture-and.html"&gt;reported&lt;/a&gt; for &lt;a href="http://1.bp.blogspot.com/-cj5wVHC7neM/ThdxYQgme2I/AAAAAAAAAqE/eRI41xhvfLU/s1600/noCorrection-color.png"&gt;color&lt;/a&gt;, &lt;a href="http://2.bp.blogspot.com/-a5AYATmSwuk/ThdxZI4IPhI/AAAAAAAAAqU/JITcc489eUU/s1600/noCorrection-texture.png"&gt;texture&lt;/a&gt; and &lt;a href="http://1.bp.blogspot.com/-lV5ijyHlMJo/ThdxYg_FK0I/AAAAAAAAAqM/5tuLrfcUDvI/s1600/noCorrection-shape.png"&gt;shape&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;&lt;p style="margin: 0px; text-indent: 0px;"&gt;The main problem of these results is in the obtained score values (inverse of the distance). If a user would be shown these figures, the first thing to be noticed is that the values are "very high" as they are supposed to quantize the degree of subjective similarity with respect to the query. In addition, the second criticism these results would obtain is that the values obtained for one descriptor are not comparable with the ones obtained by others. This second problem prevents any early  fusion of the distances because values are not comparable from a perceptual point of view. It is necessary then to apply another strategy for normalization.&lt;br /&gt;&lt;/p&gt;&lt;p style="margin: 0px; text-indent: 0px;"&gt;&lt;br /&gt;&lt;/p&gt;The chosen approach is based on the figure below, where an &lt;span style="font-style: italic;"&gt;alpha&lt;/span&gt; value estimated for each visual descriptor is mapped into an &lt;span style="font-style: italic;"&gt;beta&lt;/span&gt; value whose subjective interpretation in term of similarity is harmonized. In addition, the zero (identical) and mean (different) points are also forced for coherence with the original distance metric. The mean point (mu) of the distance was interpreted as completely different because empirical problems show that the notion of "completely different" was completely satisfied at that point (and probably earlier). So all distance values are mapped to a corrected 1.0.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://3.bp.blogspot.com/-w3HUkeT9FXI/ThdR43dOgXI/AAAAAAAAApc/14fg8Ymdbw4/s1600/harmonizing.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 290px;" src="http://3.bp.blogspot.com/-w3HUkeT9FXI/ThdR43dOgXI/AAAAAAAAApc/14fg8Ymdbw4/s400/harmonizing.png" alt="" id="BLOGGER_PHOTO_ID_5627056296823783794" border="0" /&gt;&lt;/a&gt;&lt;a href="http://3.bp.blogspot.com/-qChUMMd6IxU/ThYfvF6GOHI/AAAAAAAAAoU/aDfwQwJbBY0/s1600/Harmonizing.png"&gt;&lt;br /&gt;&lt;/a&gt;The determination of the (alpha,beta) pair is they key to solve the whole problem. The adopted solution is based on the assumption that a user expects to find "similar" matches over 0.5 and "not so similar" results under this figure. So the adopted beta was 0.5.&lt;br /&gt;&lt;br /&gt;The problem then turns into how to find the threshold between "quite" and "not so" similar. The propose solution assumes that, given a ranked list, this point is located at the top hit that defines the sub-ranked list that maximizes the &lt;a href="http://en.wikipedia.org/wiki/F1_score"&gt;F-0.5 score&lt;/a&gt;. This score is supposed to define the first K elements in a rank list that offer a good trade between precision and recall, priorizing the precision. With this criteria adopted, I ran again &lt;a href="http://bitsearch.blogspot.com/2011/06/comparing-mpeg-7-color-texture-and.html"&gt;retrieval experiments the ETHZ dataset&lt;/a&gt; and measured the distance associated to the Kth element in the obtained ranked list.&lt;br /&gt;&lt;br /&gt;Not all ranked list were used because in some cases the descriptor was not good for the query, so considering them would introduce noise to the measures. This is shown in the graph below, where in some categories some descriptors perform worse in terms of MAP than even the random case, which is adopted as a baseline. The decision on what ranked lists to use was based on the ranked list's MAP, and only those ranked list whose MAP was higher than the random MAP plus 3 times its standard deviation were considered. This way, it was expected that the considered descriptor was useful in the query that generated the ranked list. Moreover, each threshold was also weighted by the ratio between the ranked list's MAP and the random MAP obtained in the class where the query belongs to.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://2.bp.blogspot.com/-APwNEgQfCLA/Thcqt83psoI/AAAAAAAAApM/h6c5lpYtGss/s1600/MAPperClass.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 321px;" src="http://2.bp.blogspot.com/-APwNEgQfCLA/Thcqt83psoI/AAAAAAAAApM/h6c5lpYtGss/s400/MAPperClass.png" alt="" id="BLOGGER_PHOTO_ID_5627013228344750722" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;Once the &lt;span style="font-style: italic;"&gt;(alpha,beta)&lt;/span&gt; point is defined, multiple options exist for the design of the correction function. One option would be to define to two linear segments joining the the three points in two stages, another one would be to define an exponential curve that would include the requested (alpha,beta) point. If more (alpha,beta) points were defined, multiple segments could also be estimated to obtain a more accurate function.&lt;br /&gt;&lt;br /&gt;The exponential function was chosen for simplicity on its implementation, so one of the following expressions was used depending on the relation between alpha and beta.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://1.bp.blogspot.com/-vWOCz4vlIl0/ThiJqGPa0iI/AAAAAAAAAqk/mc6cELOR0Vs/s1600/correction.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 264px;" src="http://1.bp.blogspot.com/-vWOCz4vlIl0/ThiJqGPa0iI/AAAAAAAAAqk/mc6cELOR0Vs/s400/correction.png" alt="" id="BLOGGER_PHOTO_ID_5627399090722230818" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;After experimentation, an alpha value was learned for every visual descriptor. In all cases low values of alpha were estimated (0.7-0.8), proving the previous intuition that the concept of similarity if diluted long before the mean value of all distances is reached.&lt;br /&gt;&lt;br /&gt;Finally, the scores obtained in for the Apple logo query are now closer to an harmonized interpretion intra-descriptors thanks to the introduced correction.&lt;br /&gt;&lt;br /&gt;&lt;span style="font-style: italic;"&gt;MPEG-7 Dominant Colors&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;a href="http://4.bp.blogspot.com/-0z7V-tXslOQ/ThssJ0fDUWI/AAAAAAAAAqs/n1oeVbyUkTo/s1600/correction-color.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 171px;" src="http://4.bp.blogspot.com/-0z7V-tXslOQ/ThssJ0fDUWI/AAAAAAAAAqs/n1oeVbyUkTo/s400/correction-color.png" alt="" id="BLOGGER_PHOTO_ID_5628140706548896098" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;span style="font-style: italic;"&gt;MPEG-7 Contour Shape&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;a href="http://4.bp.blogspot.com/-X1VAsUcFbC8/Thssc6cX-YI/AAAAAAAAAq0/G9QEvO9M3pQ/s1600/correction-shape.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 171px;" src="http://4.bp.blogspot.com/-X1VAsUcFbC8/Thssc6cX-YI/AAAAAAAAAq0/G9QEvO9M3pQ/s400/correction-shape.png" alt="" id="BLOGGER_PHOTO_ID_5628141034565794178" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;span style="font-style: italic;"&gt;MPEG-7 Edge Histogram (texture)&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;a href="http://2.bp.blogspot.com/-V72uHGeCqXA/ThssdGWwtQI/AAAAAAAAAq8/rIQUi5c7Fdw/s1600/correction-texture.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 171px;" src="http://2.bp.blogspot.com/-V72uHGeCqXA/ThssdGWwtQI/AAAAAAAAAq8/rIQUi5c7Fdw/s400/correction-texture.png" alt="" id="BLOGGER_PHOTO_ID_5628141037763474690" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;Acknowledgments: Thanks to  my colleagues &lt;a href="http://www.ee.columbia.edu/%7Eyanwang/"&gt;Yan Wang&lt;/a&gt; and &lt;a href="http://www.ee.columbia.edu/%7Erj2349/"&gt;Rongrong Ji&lt;/a&gt; for sharing their views and ideas on this topic.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-1632893289991535634?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/1632893289991535634/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/07/normalization-of-visual-distances-for.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/1632893289991535634'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/1632893289991535634'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/07/normalization-of-visual-distances-for.html' title='Normalization of visual distances for fusion in retrieval'/><author><name>Xavi Giró-i-Nieto</name><uri>https://profiles.google.com/110271653716466055491</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh5.googleusercontent.com/--EaBZ1Bzzsg/AAAAAAAAAAI/AAAAAAAAAwU/4J6xIhZ3Xyo/s512-c/photo.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://3.bp.blogspot.com/-w3HUkeT9FXI/ThdR43dOgXI/AAAAAAAAApc/14fg8Ymdbw4/s72-c/harmonizing.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-7290330016651442414</id><published>2011-07-06T16:20:00.015+02:00</published><updated>2011-07-06T22:01:27.133+02:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='evaluation'/><category scheme='http://www.blogger.com/atom/ns#' term='fusion'/><category scheme='http://www.blogger.com/atom/ns#' term='xavi'/><category scheme='http://www.blogger.com/atom/ns#' term='descriptors'/><title type='text'>Discarding the normalization of feature vectors from MPEG-7 visual descriptors</title><content type='html'>Last week I &lt;a href="http://bitsearch.blogspot.com/2011/06/comparing-mpeg-7-color-texture-and.html"&gt;reported&lt;/a&gt; on the evaluation of three visual descriptors for image retrieval through region matching. These features quantize three independent and complementary visual cues: color, shape and texture. The experiments proved that each of them separately is valuable to describe the five object categories that were tested. The next question to be answered now is, as largely reported in the literature, whether their combination increases the overall performance of the retrieval system.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://3.bp.blogspot.com/-D4givbcIcDk/ThR8pRJ7q6I/AAAAAAAAAnE/XK_N9mvgmeg/s1600/fusion.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 132px;" src="http://3.bp.blogspot.com/-D4givbcIcDk/ThR8pRJ7q6I/AAAAAAAAAnE/XK_N9mvgmeg/s400/fusion.png" alt="" id="BLOGGER_PHOTO_ID_5626258882914134946" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;A couple of years ago I explored the possibility of using the &lt;a href="http://en.wikipedia.org/wiki/Mahalanobis_distance"&gt;Mahalanobis distance&lt;/a&gt;, a metric that takes into account the distribution of the two samples to be compared in the feature space. Its computation basically requires the previous estimation covariance of the multidimensional &lt;a href="http://en.wikipedia.org/wiki/Normal_distribution"&gt;Gaussian distribution&lt;/a&gt; of the feature vectors that represent every observation. In those cases where feature dimensions are not correlated (&lt;a href="http://en.wikipedia.org/wiki/Independent_variable#Use_in_mathematics"&gt;independent&lt;/a&gt;), the expression is simplified and only depends on the standard deviation in every dimension. Moreover, on those cases where all dimensions present a same and unitary standard deviation (white distribution), the calculation further reduces to the &lt;a href="http://en.wikipedia.org/wiki/Euclidean_distance"&gt;Euclidean distance&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://1.bp.blogspot.com/-YCmR5G3WgqY/ThR8phtQydI/AAAAAAAAAnM/LJenbTfkXhs/s1600/euclidean.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 186px;" src="http://1.bp.blogspot.com/-YCmR5G3WgqY/ThR8phtQydI/AAAAAAAAAnM/LJenbTfkXhs/s400/euclidean.png" alt="" id="BLOGGER_PHOTO_ID_5626258887357286866" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Assuming the independence of each feature dimension, the simple Euclidean distance can be used by applying a linear transformation &lt;span style="font-style: italic;"&gt;A&lt;/span&gt; to every input feature vector &lt;span style="font-style: italic;"&gt;x&lt;/span&gt; and generate a whitened distribution of feature vectors &lt;span style="font-style: italic;"&gt;y&lt;/span&gt;. Such transformation can be obtained by applying the &lt;a href="http://en.wikipedia.org/wiki/Singular_value_decomposition"&gt;Singular Vector Decomposition&lt;/a&gt; on the covariance matrix.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://3.bp.blogspot.com/-2iAveptZl9I/ThR8qKPgbQI/AAAAAAAAAnU/gqy2XOrH_7s/s1600/whitening.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 97px;" src="http://3.bp.blogspot.com/-2iAveptZl9I/ThR8qKPgbQI/AAAAAAAAAnU/gqy2XOrH_7s/s400/whitening.png" alt="" id="BLOGGER_PHOTO_ID_5626258898238336258" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;The final system would require the estimation of the mean and covariance of the feature vectors during a training stage, that would determine the whitening transformation Aw. During retrieval, every input sample x and y would be first whitened before measuring their distance with the &lt;a href="http://en.wikipedia.org/wiki/Euclidean_distance"&gt;Euclidean distance&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://1.bp.blogspot.com/-3HfYtdngaO8/ThR8qSslEDI/AAAAAAAAAnc/7Yh8Pjkx3So/s1600/architecture.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 210px;" src="http://1.bp.blogspot.com/-3HfYtdngaO8/ThR8qSslEDI/AAAAAAAAAnc/7Yh8Pjkx3So/s400/architecture.png" alt="" id="BLOGGER_PHOTO_ID_5626258900507758642" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;The application of these principles to my retrieval problem would allow a fast computation of the same Euclidean distance for every type of considered visual descriptor (shape, color and texture). However, this approach presents an unsolvable problem in my framework. The MPEG-7 visual descriptors I work with are not normally simply a feature vector whose elements have the same interpretation. For example, in the case of the &lt;a href="http://mpeg.chiariglione.org/standards/mpeg-7/mpeg-7.htm#E12E31"&gt;MPEG-7 Contour Shape&lt;/a&gt;, the descriptor contains the curvature and eccentricity of the region, as well as the normalized positions and strength of the inflection points. On the &lt;a href="http://mpeg.chiariglione.org/standards/mpeg-7/mpeg-7.htm#E12E22"&gt;MPEG-7 Dominant Color&lt;/a&gt; descriptor, the length of the vector may change in every observation, so for every region a variable amount of one to eight dominant colors might be determined. In fact, the standard suggests a difference distance for every visual descriptor, a metric that exploits the different types of information contained in the descriptors.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://2.bp.blogspot.com/-H0d3_HDH_Us/ThR9pF-TfzI/AAAAAAAAAnk/oh1AdF-gAco/s1600/Mpeg7ShapeCss.gif"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 231px;" src="http://2.bp.blogspot.com/-H0d3_HDH_Us/ThR9pF-TfzI/AAAAAAAAAnk/oh1AdF-gAco/s400/Mpeg7ShapeCss.gif" alt="" id="BLOGGER_PHOTO_ID_5626259979424202546" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;For this reason, I concluded that the classical normalization techniques on the feature vectors would not apply in my context. My normalization efforts should be applied on a later stage, on the distances themselves.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-7290330016651442414?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/7290330016651442414/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/07/why-mahalanobis-distance-was-discarded.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/7290330016651442414'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/7290330016651442414'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/07/why-mahalanobis-distance-was-discarded.html' title='Discarding the normalization of feature vectors from MPEG-7 visual descriptors'/><author><name>Xavi Giró-i-Nieto</name><uri>https://profiles.google.com/110271653716466055491</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh5.googleusercontent.com/--EaBZ1Bzzsg/AAAAAAAAAAI/AAAAAAAAAwU/4J6xIhZ3Xyo/s512-c/photo.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://3.bp.blogspot.com/-D4givbcIcDk/ThR8pRJ7q6I/AAAAAAAAAnE/XK_N9mvgmeg/s72-c/fusion.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-175198123326676839</id><published>2011-07-04T20:39:00.008+02:00</published><updated>2011-07-06T21:25:22.089+02:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Monica'/><category scheme='http://www.blogger.com/atom/ns#' term='text'/><title type='text'>Tag recommendation from an image</title><content type='html'>&lt;div style="text-align: justify;"&gt;In my latest contributions to this blog I talked about the use of &lt;a href="http://bitsearch.blogspot.com/2011/01/ranking-documents-based-on-tf-idf.html"&gt;TF-IDF as textual descriptor&lt;/a&gt; and the implementation of a &lt;a href="http://bitsearch.blogspot.com/2011/03/textual-similarity-graph.html"&gt;textual similarity graph&lt;/a&gt; from the metadata associated with an image. Last week I spoke to Xavi and we defined the tasks that I have to do this month. These include a system that suggests tags from an image.&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;The idea is: given an image search, find the most visual similar keyframes in the database. As we now have the new text descriptor we can represent each asset with a TF-IDF descriptor (Rembember that the available metadata are in general related to the whole the level of asset, not to specific keyframes). From the textual descriptor associated with the asset of the keyframes retrieval, the system will search the most representative terms and will suggest it to tag the keyframe or make a text query.&lt;br /&gt;&lt;br /&gt;This way, we exploit the same principle that guided my bachelor thesis: using automatically extracted visual descriptors from the keyframes and manually generated annotations at the asset scale to generate new annotations at the keyframe scale.&lt;br /&gt;&lt;/div&gt;&lt;br /&gt;Therefore, the first big problem to solve is how to determined what are the most representative terms among all the textual metadata associated to the assets that contain the most similar keyframes to the query image. Our first idea is to make the sum of the TF-IDF weight for each term of the descriptors. The system will obtain a rankedlist sorted according to the sum of TF-IDF weights. Then it will suggest the firsts tags limited by a threshold or a maximum number o terms.&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://1.bp.blogspot.com/-JMVVmzwK43w/ThLbyL9cRbI/AAAAAAAAAL0/dRaqcVQAOjw/s1600/Imagen1.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 320px; height: 154px;" src="http://1.bp.blogspot.com/-JMVVmzwK43w/ThLbyL9cRbI/AAAAAAAAAL0/dRaqcVQAOjw/s320/Imagen1.png" alt="" id="BLOGGER_PHOTO_ID_5625800539789608370" border="0" /&gt;&lt;/a&gt;But we have to consider other alternatives. So in my next posts I will explain the solution chosen.&lt;br /&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-175198123326676839?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/175198123326676839/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/07/tag-recomendation-from-image.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/175198123326676839'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/175198123326676839'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/07/tag-recomendation-from-image.html' title='Tag recommendation from an image'/><author><name>Monica</name><uri>http://www.blogger.com/profile/04558100039934227329</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://1.bp.blogspot.com/-JMVVmzwK43w/ThLbyL9cRbI/AAAAAAAAAL0/dRaqcVQAOjw/s72-c/Imagen1.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-3842674815468448510</id><published>2011-07-04T16:57:00.009+02:00</published><updated>2011-07-04T19:20:15.520+02:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Manel'/><category scheme='http://www.blogger.com/atom/ns#' term='webservices'/><title type='text'>JSON and Java Representations for web data exchange</title><content type='html'>&lt;a href="http://www.json.org/img/json160.gif" onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}"&gt;&lt;img style="float:right; margin:0 0 10px 10px;cursor:pointer; cursor:hand;width: 160px; height: 160px;" src="http://www.json.org/img/json160.gif" border="0" alt="" /&gt;&lt;/a&gt;&lt;div style="text-align: justify;"&gt;All Java Web Services developed in Java by UPSeek group support XML data exchanges and recently have been adapted to work with&lt;a href="http://www.json.org/"&gt; JSON (JavaScript Object Notation)&lt;/a&gt;. Its simplicity has led to widespread use for web data exchange instead of XML representations because JSON is not a document markup language. XML carries a lot of baggage, and it doesn't match the data model of most programming languages. This change has meant a great advance for UPSeek because it is a lightweight and allows us to a more convenient communication with our partners.&lt;/div&gt;&lt;div style="text-align: center;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;With JSON in the system we got more quickly and easily deserialization (process in which a text represententation become a java class or vice versa) than with XML representation. It also expand the possibilities of WebServices to more than one company.&lt;/div&gt;&lt;div&gt;&lt;a href="http://wiki.fasterxml.com/JacksonInFiveMinutes"&gt;&lt;br /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div&gt;&lt;a href="http://wiki.fasterxml.com/JacksonInFiveMinutes"&gt;&lt;span class="Apple-style-span" style="color: rgb(0, 0, 0); -webkit-text-decorations-in-effect: none; "&gt;&lt;img src="http://photos1.meetupstatic.com/photos/event/4/9/8/5/global_30738821.jpeg" border="0" alt="" style="float: left; margin-top: 0px; margin-right: 10px; margin-bottom: 10px; margin-left: 0px; cursor: pointer; width: 164px; height: 170px; " /&gt;&lt;/span&gt;&lt;/a&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;With the new services &lt;a href="http://bitsearch.blogspot.com/search/label/webservices"&gt;published&lt;/a&gt; it was noted that the amount of data transferred was very high and we needed to use our own complex Java classes to run processes on the server. Because of the many attributes that make up these classes and, therefore, calls to the services were large strings, we decided to start using the &lt;a href="http://jackson.codehaus.org/"&gt;Jackson&lt;/a&gt;, a java library that simplifies this work task.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;Then, there are two types of responses offered by our services: XML representation or String representation (JSON) as the type of user or company that connects to the server.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-3842674815468448510?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/3842674815468448510/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/07/all-java-web-services-developed-in-java.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/3842674815468448510'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/3842674815468448510'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/07/all-java-web-services-developed-in-java.html' title='JSON and Java Representations for web data exchange'/><author><name>Manel</name><uri>http://www.blogger.com/profile/13861846623917028915</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-4196047827509726178</id><published>2011-06-30T22:46:00.012+02:00</published><updated>2011-07-24T20:01:53.022+02:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='evaluation'/><category scheme='http://www.blogger.com/atom/ns#' term='MPEG-7'/><category scheme='http://www.blogger.com/atom/ns#' term='object retrieval'/><category scheme='http://www.blogger.com/atom/ns#' term='xavi'/><title type='text'>Comparing MPEG-7 Color, Texture and Shape Descriptors for Region-based Image Retrieval</title><content type='html'>Today I am happy to announce the very first results which are to be included in my Phd thesis. These results aim at evaluating the individual performance of three different &lt;a href="http://en.wikipedia.org/wiki/Visual_descriptors"&gt;MPEG-7 visual descriptors&lt;/a&gt; computed on regions. There are the &lt;a href="http://mpeg.chiariglione.org/standards/mpeg-7/mpeg-7.htm#E12E22"&gt;Dominant Color&lt;/a&gt;, &lt;a href="http://makarandtapaswi.wordpress.com/2010/07/15/mpeg-7-texture-edge-histogram-descriptor/"&gt;Texture Edge Histogram&lt;/a&gt; and &lt;a href="http://www.springer.com/computer/image+processing/book/978-1-4020-1233-4"&gt;Curvature Scale Space (CSS) Shape&lt;/a&gt; descriptor. Each of these descriptors is defined by the MPEG-7 standard together with a recommended similarity metric. This metric was taken as a base and was &lt;a href="http://bitsearch.blogspot.com/2011/02/threshold-for-mpeg-7-dominant-color.html"&gt;fine-tuned&lt;/a&gt; to improve the obtained results.&lt;br /&gt;&lt;br /&gt;As an example. One of the experiments would consider a query region of an old Apple logo with very characteristic visual cues: six distinctive colors, a vertical edgy texture and a very recognizable shape for the logo (only the big segment, the leaf was discarded because only one-part queries are supported by the system right now).&lt;br /&gt;&lt;br /&gt;&lt;a href="http://1.bp.blogspot.com/-oS2Qu2XTOmU/TgzlH_x3PWI/AAAAAAAAAmM/jbkGigPgfIg/s1600/query.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 342px; height: 305px;" src="http://1.bp.blogspot.com/-oS2Qu2XTOmU/TgzlH_x3PWI/AAAAAAAAAmM/jbkGigPgfIg/s400/query.png" alt="" id="BLOGGER_PHOTO_ID_5624121960221326690" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;As a result, if three visual descriptors are available, there exist three different criteria to assess the similarity between the query region and the regions in the target database. The three obtained results are the following:&lt;br /&gt;&lt;br /&gt;&lt;span style="font-style: italic;"&gt;MPEG-7 Dominant Color&lt;/span&gt;s&lt;br /&gt;&lt;br /&gt;&lt;a href="http://4.bp.blogspot.com/-WvZOPNERdBY/Tixd9AzFlpI/AAAAAAAAAsE/wXlw9HHauCM/s1600/noCorrection-color.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 172px;" src="http://4.bp.blogspot.com/-WvZOPNERdBY/Tixd9AzFlpI/AAAAAAAAAsE/wXlw9HHauCM/s400/noCorrection-color.png" alt="" id="BLOGGER_PHOTO_ID_5632980536701326994" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;span style="font-style: italic;"&gt;MPEG-7 Contour Shape&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;a href="http://1.bp.blogspot.com/-MLTBM_uQqvI/TisHu59UJaI/AAAAAAAAAr0/5tGqnNQ0E-E/s1600/noCorrection-shape.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 172px;" src="http://1.bp.blogspot.com/-MLTBM_uQqvI/TisHu59UJaI/AAAAAAAAAr0/5tGqnNQ0E-E/s400/noCorrection-shape.png" alt="" id="BLOGGER_PHOTO_ID_5632604261370308002" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;span style="font-style: italic;"&gt;MPEG-7 Edge Histogram&lt;/span&gt; (texture)&lt;br /&gt;&lt;br /&gt;&lt;a href="http://2.bp.blogspot.com/-J6UurSjbC3w/TisHu6vV7wI/AAAAAAAAAr8/MxJZD7ecNKU/s1600/noCorrection-texture.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 173px;" src="http://2.bp.blogspot.com/-J6UurSjbC3w/TisHu6vV7wI/AAAAAAAAAr8/MxJZD7ecNKU/s400/noCorrection-texture.png" alt="" id="BLOGGER_PHOTO_ID_5632604261580140290" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;The scores shown in these screen shots are normalized by the mean value of all distances when comparing all the elements in the dataset between each other.&lt;br /&gt;&lt;br /&gt;This is only the result that might be obtained with one query. In general, when the object of interest appears in the target image, this is correctly spotted among the other dozens of regions that are defined in the image. However, in some cases image that do not represent the query object are ranked better than others, so it is clear that the proposed system is not perfect at all.&lt;br /&gt;&lt;br /&gt;In order to obtain more representative results, a broader experiment was conducted on the &lt;a href="http://bitsearch.blogspot.com/2010/08/state-of-art-of-recognition-using.html"&gt;ETH Zurich Shape dataset&lt;/a&gt;, divided in five categories, and use each defined object as a query to a database that contained the rest of the dataset. The query was formulated by &lt;a href="http://bitsearch.blogspot.com/2010/08/interactive-object-segmentation-with.html"&gt;matching&lt;/a&gt; the query object with the segments defined on a Binary Partition Tree (BPT), and use this mapping as the region of support for the query. The visual descriptors were extracted from the query region and compared with the visual descriptors of the segments defined by the BPT of every image in the dataset. For each of them, the best matching regions was identified and the similarity to the query was used as the score over which building a ranked list. So for every query a ranked list was built, its Average Precision computed and finally obtained a Mean Average Precision for each of the five categories. The obtained results were compared to &lt;a href="http://bitsearch.blogspot.com/2011/06/average-precision-of-random-ranked-list.html"&gt;random ranked list&lt;/a&gt; used as baseline and are shown in the following graph:&lt;br /&gt;&lt;br /&gt;&lt;a href="http://2.bp.blogspot.com/-x244xCVeiAo/ThiDkiZRneI/AAAAAAAAAqc/6jFt6UzH-Ys/s1600/MAPs.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 320px;" src="http://2.bp.blogspot.com/-x244xCVeiAo/ThiDkiZRneI/AAAAAAAAAqc/6jFt6UzH-Ys/s400/MAPs.png" alt="" id="BLOGGER_PHOTO_ID_5627392398130781666" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;Results indicate that the given name to the database, ETH Zurich SHAPE dataset is an accurate one, as the shape descriptor is the one that provides best results in almost all categories (in the case of mugs, color outperforms CSS Shape). The table also proves that the engine performs better than a random ranked list in all categories but for Swans, which presents many segmentation problems. However, the visual diversity of the objects in the dataset is an important limitation when searching by only considering the visual features of a single query region. Future experiments will aim at estimating how can the different cues be combined and whether a retrieval based on a previously trained classifier that would consider more visual diversity would improve the performance in terms of MAP.&lt;br /&gt;&lt;br /&gt;(reviewed on the 8th July 2011 with updated screen shots)&lt;br /&gt;(reviewed on the 9th Jul 2011 to introduce the graph of results all over the whole dataset)&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-4196047827509726178?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/4196047827509726178/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/06/comparing-mpeg-7-color-texture-and.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/4196047827509726178'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/4196047827509726178'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/06/comparing-mpeg-7-color-texture-and.html' title='Comparing MPEG-7 Color, Texture and Shape Descriptors for Region-based Image Retrieval'/><author><name>Xavi Giró-i-Nieto</name><uri>https://profiles.google.com/110271653716466055491</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh5.googleusercontent.com/--EaBZ1Bzzsg/AAAAAAAAAAI/AAAAAAAAAwU/4J6xIhZ3Xyo/s512-c/photo.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://1.bp.blogspot.com/-oS2Qu2XTOmU/TgzlH_x3PWI/AAAAAAAAAmM/jbkGigPgfIg/s72-c/query.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-3498060494583459015</id><published>2011-06-29T23:11:00.010+02:00</published><updated>2011-06-30T17:48:18.729+02:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='evaluation'/><category scheme='http://www.blogger.com/atom/ns#' term='retrieval'/><category scheme='http://www.blogger.com/atom/ns#' term='xavi'/><title type='text'>Average Precision of a Random Ranked List</title><content type='html'>I have recently started &lt;a href="http://bitsearch.blogspot.com/2011/05/evaluation-of-local-detections-and.html"&gt;testing&lt;/a&gt; the retrieval algorithms developed for my thesis on the &lt;a href="http://www.vision.ee.ethz.ch/datasets/index.en.html"&gt;ETHZ shape dataset&lt;/a&gt;. These dataset contains a collection of 255 images that contain 289 instances of five different categories: Apple logos, bottles, giraffes, mugs, and swans.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://3.bp.blogspot.com/-ZTUVTGFb1p4/TguoAvuTgfI/AAAAAAAAAl4/7pSp9SO0TXI/s1600/ETHZ-elements.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 391px; height: 240px;" src="http://3.bp.blogspot.com/-ZTUVTGFb1p4/TguoAvuTgfI/AAAAAAAAAl4/7pSp9SO0TXI/s400/ETHZ-elements.png" alt="" id="BLOGGER_PHOTO_ID_5623773290466345458" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;As in any retrieval experiment, the results obtained is a ranked list of results according to the estimated relevance of each item in the test dataset when compared by a certain query. A typical measure for the quality of these ranked lists is the &lt;a href="http://en.wikipedia.org/wiki/Information_retrieval#Average_precision"&gt;average precision (AP)&lt;/a&gt;, which basically computes the &lt;a href="http://bitsearch.blogspot.com/2010/03/retrieval-systems-evaluation.html"&gt;precision&lt;/a&gt; of every relevant result obtained at the position where it appears in the generated ranked list.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://2.bp.blogspot.com/-JPoEbA8DPFs/TguoAjGip8I/AAAAAAAAAlw/HaVt_E_TiIk/s1600/AveragePrecision.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 333px; height: 47px;" src="http://2.bp.blogspot.com/-JPoEbA8DPFs/TguoAjGip8I/AAAAAAAAAlw/HaVt_E_TiIk/s400/AveragePrecision.png" alt="" id="BLOGGER_PHOTO_ID_5623773287078340546" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;When I started obtaining results for the techniques we have been developed, I started wondering but kind of results should I expect. An initial (and wrong) idea was that if the test dataset contained 5 categories, the base figure to beat would be a value of AP=0,2 for every category, as in a classification problem. But the results I obtained were not normally around this value and, in fact, they were category-dependent. These initial steps help me realize that: a) my software was buggy and b) the random results that you would expect from a very buggy implementation were not generating uniform 0,2 AP values for every category.&lt;br /&gt;&lt;br /&gt;So, apart from working hard on debugging the software, I also started wondering what would be the AP for a random solution of a retrieval problem. Given that I had a computer to use, the first approach to the problem was to find an empirical answer to that question. So I just simply programmed to generation of random ranked lists and, in one of the trials, I obtained these results:&lt;br /&gt;&lt;br /&gt;&lt;a href="http://1.bp.blogspot.com/-4Ed5dirJGyE/TguoA3QUeaI/AAAAAAAAAmA/ZRaUi1XtEPU/s1600/Random-empirical.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 241px; height: 238px;" src="http://1.bp.blogspot.com/-4Ed5dirJGyE/TguoA3QUeaI/AAAAAAAAAmA/ZRaUi1XtEPU/s400/Random-empirical.png" alt="" id="BLOGGER_PHOTO_ID_5623773292488063394" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;They prove that the AP between the five classes is related to the amount of instances of every class. The more instances in the dataset, the higher expected AP in a random ranked list.&lt;br /&gt;&lt;br /&gt;The next question was then how this expected AP could be related to the amount of instances. A first step was to build a toy example with a test dataset of only three elements belonging to two classes: one instance for class A and two instances for class B. A uniform random distribution of ranked list could be composed as:&lt;br /&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;1.A, 2.B, 3.B&lt;/span&gt;&lt;br /&gt;AP(A) = 1&lt;br /&gt;AP(B)= 1/2 (1/2 + 2/3) = 5/12&lt;br /&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;1.B, 2.A, 3.B&lt;/span&gt;&lt;br /&gt;AP(A) = 1/2&lt;br /&gt;AP(B) = 1/2 (1 + 2/3) = 5/6&lt;br /&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;1.B, 2.B, 3.A&lt;/span&gt;&lt;br /&gt;AP(A) = 1/3&lt;br /&gt;AP(B) = 1/2 (1 + 1 ) = 1&lt;br /&gt;&lt;br /&gt;As a result, the expectation for the first class is E{ AP(A) } = 0.61 and for the second class is E{ AP(B) } = 0.75, which does not directly correspond to the proportion of elements of each class in the test set, which would be of 1/3 and 2/3 respectively.&lt;br /&gt;&lt;br /&gt;The remaining question is then, how to analytically compute the estimation of AP(x) where x is a class in a randomly generated ranked list ?&lt;br /&gt;&lt;br /&gt;(to be continued and solved soon...)&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-3498060494583459015?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/3498060494583459015/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/06/average-precision-of-random-ranked-list.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/3498060494583459015'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/3498060494583459015'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/06/average-precision-of-random-ranked-list.html' title='Average Precision of a Random Ranked List'/><author><name>Xavi Giró-i-Nieto</name><uri>https://profiles.google.com/110271653716466055491</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh5.googleusercontent.com/--EaBZ1Bzzsg/AAAAAAAAAAI/AAAAAAAAAwU/4J6xIhZ3Xyo/s512-c/photo.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://3.bp.blogspot.com/-ZTUVTGFb1p4/TguoAvuTgfI/AAAAAAAAAl4/7pSp9SO0TXI/s72-c/ETHZ-elements.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-4174576105367329751</id><published>2011-06-22T15:41:00.000+02:00</published><updated>2011-06-22T15:42:18.995+02:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Martí'/><category scheme='http://www.blogger.com/atom/ns#' term='thesis'/><title type='text'>Results and conclusions</title><content type='html'>RESULTS&lt;br /&gt;&lt;br /&gt;I developed a launcher application that meets the HbbTV standard it's totally configurable by the programmer. I have also helped the Activa Multimedia team to improve their applications.&lt;br /&gt;&lt;br /&gt;CONCLUSIONS&lt;br /&gt;&lt;br /&gt;I have done some improvements and tests. I also have seen that HbbTV is an standard that needs more development, and broadcasters are not meeting the specification with all requeriments. &lt;br /&gt;In my opinion, this standard is more useful than MHP, so TVC, Telecinco and other broadcasters are developing applications for this new technology.&lt;br /&gt;&lt;br /&gt;PERSONAL CONCLUSIONS&lt;br /&gt;&lt;br /&gt;This experience has made me grow in two ways: the personal and the professional way. &lt;br /&gt;I saw too that working as a programmer is more interesting for me than I have ever thought. &lt;br /&gt;&lt;br /&gt;&lt;br /&gt;You can read more about this project in&lt;a href="http://gps-tsc.upc.es/imatge/_Xgiro/start.html"&gt; my thesis document&lt;/a&gt;.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-4174576105367329751?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/4174576105367329751/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/06/results-and-conclusions.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/4174576105367329751'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/4174576105367329751'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/06/results-and-conclusions.html' title='Results and conclusions'/><author><name>Martí Alcon</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-3833426361825418295</id><published>2011-06-15T20:24:00.058+02:00</published><updated>2011-07-01T16:17:31.357+02:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='segmentation'/><category scheme='http://www.blogger.com/atom/ns#' term='Jaume'/><category scheme='http://www.blogger.com/atom/ns#' term='thesis'/><category scheme='http://www.blogger.com/atom/ns#' term='classification'/><category scheme='http://www.blogger.com/atom/ns#' term='erasmus'/><category scheme='http://www.blogger.com/atom/ns#' term='medical'/><title type='text'>Segmentation of the breast region with pectoral muscle suppression and automatic breast density classification</title><content type='html'>It has been long time since I wrote my last article because I was working in Belgium on my Master Thesis for the degree of Telecommunication Engineering. I would have preferred writing earlier but I was really busy first reading documentation, implementing the source code and performing the experiments. I've chosen to write now because I've just defended my thesis!&lt;div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify; "&gt;I've been working with &lt;a href="http://www.tele.ucl.ac.be/view-people.php?id=83"&gt;Benoit Macq&lt;/a&gt; and &lt;a href="http://www.tele.ucl.ac.be/view-people.php?id=99"&gt;Vincent Nicolas&lt;/a&gt; in the &lt;a href="http://www.uclouvain.be/en-icteam.html"&gt;ICTEAM&lt;/a&gt; of the Université Catholique de Louvain (&lt;a href="http://www.uclouvain.be/en-index.html"&gt;UCL&lt;/a&gt;), medical image processing field. My Masther Thesis was part of a big project called &lt;a href="http://www.tele.ucl.ac.be/view-project.php?name=MammoNote"&gt;MammoNote&lt;/a&gt;, a standard platform for helping the annotation of digital mammography and diagnostic of breast cancer.&lt;/div&gt;&lt;div style="text-align: justify; "&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify; "&gt;I would like to present a little bit what I have done during these months and the obtained results. My final goal was to implement a new method to segment and classify automatically &lt;a href="http://en.wikipedia.org/wiki/Mammography"&gt;mammograms&lt;/a&gt; according to their density. Lets catch up the subject a little bit and explain the need of this method.&lt;/div&gt;&lt;div style="text-align: justify; "&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify; "&gt;&lt;a href="http://en.wikipedia.org/wiki/Breast_cancer"&gt;Breast cancer&lt;/a&gt; is one of the major causes of death among women. Suspicious breast cancers appear as white spots in mammograms, indicating small clusters of micro-calcifications. Nowadays screening mammography is the most adopted technique to perform an early breast cancer detection. However, the accuracy of the computer-aided systems decreases due some factors like density of the breast, presence of labels, artifacts or even pectoral muscle in the mammogram image. So we wanted to obtain a new image only with the presence of the breast without any other element that could decrease the obtained classification rate.&lt;/div&gt;&lt;div style="text-align: justify; "&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify; "&gt;So, my first objective, after reading all the documentation, was implementing a method to segment the breast from the rest of image regions that can be present in a mammography, basically: background, labels, artifacts and pectoral muscles. &lt;a href="http://3.bp.blogspot.com/-XQuc4-LzCK0/Tg3VI0Ych5I/AAAAAAAAAm8/bz553bpNx2w/s1600/Types_of_noise_mammography.png"&gt;&lt;br /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="font-style: italic;"&gt;Labels removal&lt;/span&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify; "&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify; "&gt;For performing the removing labels step it was used a region growing algorithm, from the library of image processing ITK and implemented in C++, called &lt;span style="font-style: italic;"&gt;ConnectedThreshold&lt;/span&gt;. The base of these algorithms is grow from an initial point, normally inside the desired element to segment, and evaluate if the neighbourhood pixels if they should be considered as the desired object or not.&lt;/div&gt;&lt;div style="text-align: justify; "&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify; "&gt;The Insight Toolkit (&lt;a href="http://www.itk.org/"&gt;ITK&lt;/a&gt;) is an open-source platform used for performing registration (aligning and finding correspondence between data) and segmentation (the process of identifying and classifying data extracted from digitally sampled representation acquired from medical images as CT or MRI scanners) among other applications. It's a processing image library very used in medical applications and already used in the medical software implemented in our department. &lt;/div&gt;&lt;div style="text-align: justify; "&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify; "&gt;We can find 4 different type of noise that appear in these mammogram images:&lt;/div&gt;&lt;div style="text-align: justify; "&gt;&lt;ul&gt;&lt;li&gt;High intensity label.&lt;/li&gt;&lt;li&gt;Low intensity label.&lt;/li&gt;&lt;li&gt;Scanning artifact.&lt;/li&gt;&lt;li&gt;Tape artefict.&lt;/li&gt;&lt;/ul&gt;In the following figure we can see an example of each one:  &lt;/div&gt;&lt;div style="text-align: justify; "&gt;&lt;div style="text-align: justify; "&gt;&lt;br /&gt;&lt;/div&gt;&lt;a href="http://3.bp.blogspot.com/-XQuc4-LzCK0/Tg3VI0Ych5I/AAAAAAAAAm8/bz553bpNx2w/s1600/Types_of_noise_mammography.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 373px; height: 400px;" src="http://3.bp.blogspot.com/-XQuc4-LzCK0/Tg3VI0Ych5I/AAAAAAAAAm8/bz553bpNx2w/s400/Types_of_noise_mammography.png" alt="" id="BLOGGER_PHOTO_ID_5624385857132464018" border="0" /&gt;&lt;/a&gt;&lt;a href="http://3.bp.blogspot.com/-yrXIhqzZsF4/Tg3BiICLR5I/AAAAAAAAAIE/XYc3qnCZmFk/s1600/Types%2Bof%2Bnoise%2Bmammography.png"&gt;&lt;br /&gt;&lt;/a&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: left;"&gt;So our objective in this step is achieve to remove all of them from our mammogram images. The obtained results were almost a perfect rate of removal except for one scanning artifact. We can see them in the following figure where in the first image appears the breast with all the noises present and in the second one with all of them removed:&lt;/div&gt;&lt;/div&gt;&lt;div style="text-align: justify; "&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify; "&gt;&lt;a href="http://2.bp.blogspot.com/-darRRdonnrM/TgtVZ1zZwWI/AAAAAAAAAkU/lSqtrhSxEzI/s1600/img_results_labels.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 242px;" src="http://2.bp.blogspot.com/-darRRdonnrM/TgtVZ1zZwWI/AAAAAAAAAkU/lSqtrhSxEzI/s400/img_results_labels.png" alt="" id="BLOGGER_PHOTO_ID_5623682462129963362" border="0" /&gt;&lt;/a&gt; &lt;/div&gt;&lt;div&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://3.bp.blogspot.com/-kkXg0Y5wntc/TgR7Rr2mrgI/AAAAAAAAAHA/-wvRryXL_Fo/s1600/img_results_labels.jpg"&gt;&lt;/a&gt;&lt;div style="text-align: center;"&gt;&lt;br /&gt;&lt;div style="text-align: left;"&gt;&lt;span style="font-style: italic;"&gt;Pectoral muscle removal&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;/div&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;The following step, remove the pectoral muscle from the mammogram images, was a more complicated task because of different dimensions, shapes and pixel intensities from the muscles in the images. The &lt;a href="http://en.wikipedia.org/wiki/Hough_transform"&gt;Hough transform&lt;/a&gt; was used, a common used technique to detect geometrical shapes in images. In our case, we wanted to detect the straight line that separates the muscle from the breast and apply a mask to keep the image without it. It was also necessary to apply some pre-processing procedures to the images like smoothing, finding the orientation, extract a region of interest that contains the pectoral muscle and apply an edge detection filter. In the following figures we can see some examples of the technique and the obtained results:&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;div&gt;&lt;span class="Apple-style-span"&gt;&lt;u&gt;&lt;br /&gt;&lt;br /&gt;&lt;/u&gt;&lt;/span&gt;&lt;a href="http://4.bp.blogspot.com/-S9JMrptyb4s/TgtVRncsV8I/AAAAAAAAAj8/V8MBHI3VaZA/s1600/hough.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 242px;" src="http://4.bp.blogspot.com/-S9JMrptyb4s/TgtVRncsV8I/AAAAAAAAAj8/V8MBHI3VaZA/s400/hough.png" alt="" id="BLOGGER_PHOTO_ID_5623682320837662658" border="0" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;&lt;span class="Apple-style-span" style="color: rgb(0, 0, 0); "&gt;&lt;br /&gt;&lt;span style="font-style: italic;"&gt;Density classification&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;Then we decided to continue with the density classification task. The used procedure has its base in a previous work and I have adapted to the actual study case as well as I also added some new features like a cross validation method, using texture features in the feature vector or presenting the results as recognition rates and confusion matrices. &lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;The classification algorithm extracts a large number of random sub-windows, possibly overlapping, at random positions and random size. Then these sub-windows are resized to a 16x16 pixels and it is created a feature vector only using the pixel values. One of the improvements was adding other values to this vector, like texture features as skewness, kurtosis or smoothness. &lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;Then, with the extracted sub-windows of the train images, after a process of 10-fold cross-validation, the algorithm trains a classifier based on an ensemble of &lt;a href="http://en.wikipedia.org/wiki/Decision_tree"&gt;decision trees&lt;/a&gt; using random thresholds and splitting the nodes until reaching the final class. &lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;For the testing step, the extracted sub-windows from the test images are propagated through the trees and the probability to belong to the class is averaged for all the samples and for each tree finding the final image class.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;We have done a lot of experiments, varying several parameters, to find the ones that gave us the best recognition rate. Finally we found that the best results were achieved using the whole original image, sampling the images with as much sub-windows as possible, using a minimum number of elements in a node to initiate an split equal to 4 and adding texture features to the vector.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;The results are quite promising because we have achieved the best recognition rate compared with our documentation referred to the current state of the art. In the following images we can see our best results adding or not texture features and a chart showing the different used methods and their corresponding rates.&lt;br /&gt;&lt;br /&gt;&lt;/div&gt;&lt;a href="http://3.bp.blogspot.com/-ocXpyttGhwU/TgtVS0CKYoI/AAAAAAAAAkM/nkNtjxz00no/s1600/results.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 164px;" src="http://3.bp.blogspot.com/-ocXpyttGhwU/TgtVS0CKYoI/AAAAAAAAAkM/nkNtjxz00no/s400/results.png" alt="" id="BLOGGER_PHOTO_ID_5623682341395915394" border="0" /&gt;&lt;/a&gt;&lt;div style="text-align: justify;"&gt;&lt;div style="text-align: justify; "&gt;&lt;span class="Apple-style-span" style="font-style: italic; "&gt;References&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify; "&gt;&lt;span class="Apple-style-span" style="font-style: italic; "&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify; "&gt;&lt;div style="text-align: justify; "&gt;[1] K. Bovis and S. Singh. Classification of mammographic breast density using a combined classifier paradigm.&lt;/div&gt;&lt;div style="text-align: justify; "&gt;[2] S. Tzikopoulos, H. Georgiou, M. Mavroforakis, N. Dimitropoulos, and S. Theodoridis. A fully automated complete segmentation scheme for mammograms.&lt;/div&gt;&lt;div style="text-align: justify; "&gt;&lt;div style="text-align: justify; "&gt;[3] M. Mustra, M. Grgic, and K. Delac. Feature selection for automatic breast density classification.&lt;/div&gt;&lt;div style="text-align: justify; "&gt;[4] T. MacGahan, M. R. Pacheco, and A. Wong. A hybrid approach to automatic mammography classification.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-style: italic; "&gt;Conclusions&lt;/span&gt;&lt;/div&gt;&lt;/div&gt;&lt;/div&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span"&gt;To conclude this article I want to give my opinion about the Erasmus exchange. I think it's a very positive experience to go abroad and make the Master final Thesis because besides of learning other language, French in my case, you soak up the culture of the country, travel a lot, meet new people and have fun!&lt;/span&gt;&lt;/div&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-3833426361825418295?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/3833426361825418295/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/06/master-thesis-segmentation-of-breast.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/3833426361825418295'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/3833426361825418295'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/06/master-thesis-segmentation-of-breast.html' title='Segmentation of the breast region with pectoral muscle suppression and automatic breast density classification'/><author><name>Jaume</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://3.bp.blogspot.com/-XQuc4-LzCK0/Tg3VI0Ych5I/AAAAAAAAAm8/bz553bpNx2w/s72-c/Types_of_noise_mammography.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-6379654346551739819</id><published>2011-06-12T16:35:00.006+02:00</published><updated>2011-07-04T22:38:37.649+02:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='thesis'/><category scheme='http://www.blogger.com/atom/ns#' term='Marcel'/><title type='text'>Interactive Image processing demos for the web with Wt</title><content type='html'>&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/-L1nU7z8QKh8/ThIkustpSWI/AAAAAAAAAD0/1d8Q_0qm1mU/s1600/foto.JPG"&gt;&lt;br /&gt;&lt;/a&gt;&lt;br /&gt;After four months of hard work, I am proud to present the results of my thesis, which are a framework, some utilities for this framework(called widgets) and a little web demonstration.&lt;br /&gt;&lt;br /&gt;To do a little remembering, I was trying to demonstrate the UPC Image and Video processing group algorithms from ImagePlus, a software platform from them , to the web. You can follow the main stream of the project by reading &lt;a href="http://bitsearch.blogspot.com/search/label/Marcel"&gt;all my articles&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;Speaking about this framework, it is not very easy to create a web page and even less in C++. The main advantage of the web framework I have built is that you can create a web page like this in less than 3 lines of code.&lt;br /&gt;&lt;br /&gt;By doing that, you get an entire web page with its header, its logos,footer and so on.&lt;br /&gt;&lt;br /&gt;This approach represents an improvement for programmers in the UPC Image and Video processing groups, who did not even use graphical interfaces for their demonstrations. Now, they are able to create web demonstrations in a fast and easy way, without learning web technology (PHP, HTML, javascript, etc.)&lt;br /&gt;&lt;br /&gt;By doing that then, you get a whole void interface.&lt;br /&gt;&lt;a href="http://3.bp.blogspot.com/-UnK9DPJ9TBc/TfTX--zNslI/AAAAAAAAADs/vBCVxsWWrQg/s1600/void.jpg"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 273px;" src="http://3.bp.blogspot.com/-UnK9DPJ9TBc/TfTX--zNslI/AAAAAAAAADs/vBCVxsWWrQg/s400/void.jpg" alt="" id="BLOGGER_PHOTO_ID_5617352112247517778" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;Furthermore, I developed some utilities to use with the web demonstration. My advisors and I thought that It was too difficult to render a image in the web, using the main technology used in this project, Wt. (If you want to know what is it, please go to my last articles where I spoke about it)&lt;br /&gt;&lt;br /&gt;Then I created an intermediate layer, some utilities that automatically call Wt functions and do the usual work, like render a image from ImagePlus to the web. Now, to render an image you just have to create an object of my class, "WebImage" and use the "paint" method. Quite easy, or at least, easier than before...&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;The other utility I've made is a Bibliographic zone in the interface. The utility just displays a formatted bibliographic text given 4 arguments which are the authors, the name of the article, the publication and the URL.&lt;br /&gt;&lt;br /&gt;I also reserved a place for bibliographic texts in my interface, and I made it easier to put bibliographic texts, just writing a line of code for each reference.&lt;br /&gt;&lt;br /&gt;And finally I made a web demonstration of an ImagePlus algorithm. It is called &lt;a href="http://bitsearch.blogspot.com/2010/02/binary-partition-trees.html"&gt;BPT&lt;/a&gt; and consists on creating a tree by joining the littlest regions to form bigger regions, until there is just one region which is the whole image.&lt;br /&gt;&lt;br /&gt;This demonstration contained lots of things, an slider to navigate for that tree, a wheel event which actually does the same of the slider, the original image, the image which is being processed, a file uploader and a combo box to select the image from the server. Also a button to show contours of every region.&lt;br /&gt;&lt;br /&gt;Watch it by yourselves.&lt;br /&gt;&lt;br /&gt;&lt;object width="320" height="266" class="BLOG_video_class" id="BLOG_video-b52b09bf8966b2b7" classid="clsid:D27CDB6E-AE6D-11cf-96B8-444553540000" codebase="http://download.macromedia.com/pub/shockwave/cabs/flash/swflash.cab#version=6,0,40,0"&gt;&lt;param name="movie" value="http://www.youtube.com/get_player"&gt;&lt;param name="bgcolor" value="#FFFFFF"&gt;&lt;param name="allowfullscreen" value="true"&gt;&lt;param name="flashvars" value="flvurl=http://v20.nonxt7.googlevideo.com/videoplayback?id%3Db52b09bf8966b2b7%26itag%3D5%26app%3Dblogger%26ip%3D0.0.0.0%26ipbits%3D0%26expire%3D1331641300%26sparams%3Did,itag,ip,ipbits,expire%26signature%3DEAF4D41DAF699E7B8646BD50947ACE2F93ECC0C.5470E021C5C3052BC5EB9F06428638AE71303DC7%26key%3Dck1&amp;amp;iurl=http://video.google.com/ThumbnailServer2?app%3Dblogger%26contentid%3Db52b09bf8966b2b7%26offsetms%3D5000%26itag%3Dw160%26sigh%3D42YipDiuev13CF2ByYrxv-Fa668&amp;amp;autoplay=0&amp;amp;ps=blogger"&gt;&lt;embed src="http://www.youtube.com/get_player" type="application/x-shockwave-flash"width="320" height="266" bgcolor="#FFFFFF"flashvars="flvurl=http://v20.nonxt7.googlevideo.com/videoplayback?id%3Db52b09bf8966b2b7%26itag%3D5%26app%3Dblogger%26ip%3D0.0.0.0%26ipbits%3D0%26expire%3D1331641300%26sparams%3Did,itag,ip,ipbits,expire%26signature%3DEAF4D41DAF699E7B8646BD50947ACE2F93ECC0C.5470E021C5C3052BC5EB9F06428638AE71303DC7%26key%3Dck1&amp;iurl=http://video.google.com/ThumbnailServer2?app%3Dblogger%26contentid%3Db52b09bf8966b2b7%26offsetms%3D5000%26itag%3Dw160%26sigh%3D42YipDiuev13CF2ByYrxv-Fa668&amp;autoplay=0&amp;ps=blogger"allowFullScreen="true" /&gt;&lt;/object&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;Main contributions and results&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;Because of the length of this article I will try to summarize the conclusions of the thesis.&lt;br /&gt;The first conclusion I get from this project is that now people from UPC Image and Video processing group are able to do web demonstrations and show them online, so they now can be seen by all over the world.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;At the end, there is a framework which allows you to create a web interface in a few lines of code, two widgets, to render an image and to format a bibliography very easy. Also there is the web demonstration which you can see here.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://imatge.upc.edu:8080/"&gt;demonstration online&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;Mobile devices and future work&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;Besides, they don't have to learn web technology which make it even easier.&lt;br /&gt;The next thing we can think about is mobile demonstration, because in this architecture that we are using the client (phone or computer) does not work too much, the server is who takes care of everything.¡, or almost everythig, so we can think in mobile demonstrations by adapting all the layout to mobile devices.&lt;br /&gt;&lt;br /&gt;And finally, other types of widgets and a lot of different things can be integrated in the demonstrator and in the developer's demonstrations. Really, I expect to see soon some demonstration from someone else.&lt;br /&gt;&lt;br /&gt;If you want to see&lt;a href="http://gps-tsc.upc.es/imatge/_Xgiro/teaching/thesis/2010-2011/MarcelTella/memoria.pdf"&gt; the whole document&lt;/a&gt;, you can go to&lt;a href="http://gps-tsc.upc.es/imatge/_Xgiro/start.html"&gt; Xavi's professional web page&lt;/a&gt; where you can find my thesis as well as my friends Martí and Eli's thesis which have been directed by Xavi this spring.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;And at last but not least, the oral presentation was good, I explained everything in my own words. At the beginning I was kind of nervous, but after everything was right. I was and actually I am very happy because I got a "distinction"(10).&lt;br /&gt;&lt;br /&gt;&lt;a href="http://4.bp.blogspot.com/-L1nU7z8QKh8/ThIkustpSWI/AAAAAAAAAD0/1d8Q_0qm1mU/s1600/foto.JPG"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 299px;" src="http://4.bp.blogspot.com/-L1nU7z8QKh8/ThIkustpSWI/AAAAAAAAAD0/1d8Q_0qm1mU/s400/foto.JPG" alt="" id="BLOGGER_PHOTO_ID_5625599269234362722" border="0" /&gt;&lt;/a&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-6379654346551739819?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/6379654346551739819/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/06/algorithm-demonstrator-results-and.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/6379654346551739819'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/6379654346551739819'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/06/algorithm-demonstrator-results-and.html' title='Interactive Image processing demos for the web with Wt'/><author><name>Marcel</name><uri>http://www.blogger.com/profile/06357583442533980245</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://3.bp.blogspot.com/-UnK9DPJ9TBc/TfTX--zNslI/AAAAAAAAADs/vBCVxsWWrQg/s72-c/void.jpg' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-8446720884444356637</id><published>2011-05-07T17:27:00.010+02:00</published><updated>2011-05-07T18:45:42.775+02:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='segmentation'/><category scheme='http://www.blogger.com/atom/ns#' term='evaluation'/><category scheme='http://www.blogger.com/atom/ns#' term='object'/><category scheme='http://www.blogger.com/atom/ns#' term='detection'/><category scheme='http://www.blogger.com/atom/ns#' term='xavi'/><title type='text'>Evaluation of object detections and segmentations</title><content type='html'>I am currently evaluating the algorithms I have developed for automatic object detection and segmentation. As my techniques are based on hierarchical partitions of the image, I have taken a similar work from &lt;a href="http://bitsearch.blogspot.com/2010/08/state-of-art-of-recognition-using.html"&gt;Gu, Lim, Arblelaz and Malik&lt;/a&gt; (Berkeley) as a reference. In their paper they used the &lt;a href="http://www.vision.ee.ethz.ch/datasets/index.en.html"&gt;ETHZ shape dataset&lt;/a&gt;, which has been proven a good first test because its size is small (256 images belonging to 5 different categories) and provides a segmentation ground truth for evaluation. Two types of experiments can be run on these data: object detection and segmentation.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://4.bp.blogspot.com/-VR_PvdtWHzw/TcV2CIT0jjI/AAAAAAAAAhw/l7MdGZZOi18/s1600/DetectionSegmentation.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 95px;" src="http://4.bp.blogspot.com/-VR_PvdtWHzw/TcV2CIT0jjI/AAAAAAAAAhw/l7MdGZZOi18/s400/DetectionSegmentation.png" alt="" id="BLOGGER_PHOTO_ID_5604015090294885938" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;In the object detection case, the goal is to determine if an object of a certain category is represented by an image and, if so, provide a rough estimation of its location. The location is expressed through the bounding box and a detection is considered correct if the bounding boxes of the detected object and the annotated ground truth overlap on a ratio higher of 0.5 when compared to the union of two boxes. This criterion is known as the &lt;a href="http://www.vision.ee.ethz.ch/%7Ecalvin/ethz_pascal_stickmen/downloads/README.txt"&gt;Pascal criterion&lt;/a&gt;, as it is the one used in the &lt;a href="http://pascallin.ecs.soton.ac.uk/challenges/VOC/"&gt;Pascal Visual Object Classes Challenge&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://4.bp.blogspot.com/-_XUubOh3DLg/TcV2DSnXh0I/AAAAAAAAAiI/TZ_ahtFcFwk/s1600/PascalCriterion.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 108px;" src="http://4.bp.blogspot.com/-_XUubOh3DLg/TcV2DSnXh0I/AAAAAAAAAiI/TZ_ahtFcFwk/s400/PascalCriterion.png" alt="" id="BLOGGER_PHOTO_ID_5604015110241093442" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;As this is a detection problem, the decision of whether the object is present or not is usually dependent of a certain threshold that sets a compromise between the precision and the recall of the detections. For this reason, providing a detection measure is somehow tricky, as results must take into account this balance. To solve this issue, detection rates (recall) are given together with their associated False Positive detections Per Image (FPPI). By doing so, the detection experiment can be run at different thresholds to will provide different detection rate-FPPI pairs.&lt;br /&gt;&lt;br /&gt;Unfortunately it is highly improbable that the output of an experiment will exactly provide the reference FPPI that wants to be measured. So the result of running the detection algorithm at different thresholds will be a set of points that are to be interpolated to find and estimation of the detection rate at the desired FPPI. In my case that was laborious, as I had to try different detection threshold smartly, generating measures as close to the FPPI as possible so that the interpolation of the detection rate was as accurate as possible.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://4.bp.blogspot.com/-5fIgXgejfj8/TcV2CTgrTAI/AAAAAAAAAh4/Z2JgB6ZmaZY/s1600/DetectionVsFppi.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 108px;" src="http://4.bp.blogspot.com/-5fIgXgejfj8/TcV2CTgrTAI/AAAAAAAAAh4/Z2JgB6ZmaZY/s400/DetectionVsFppi.png" alt="" id="BLOGGER_PHOTO_ID_5604015093301595138" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;Moreover, as in any classification problem, experiments must be run on different train and test partitions of the annotated dataset. I followed the proposed scheme of estimating the detection rate of every category at 0.3 FPPI for five trials. The partition was generated randomly, as Eli &lt;a href="http://bitsearch.blogspot.com/2011/04/dataset-generation.html"&gt;explained&lt;/a&gt; last week. As a result, a mean detection rate and its deviation was computed for every category which, finally, are also averaged to provide a single result.&lt;br /&gt;&lt;br /&gt;Once the detection performance is evaluated, the second problem can be handle: how to evaluate the quality of the segmentation ? This second part is not that clear on the reference paper, so I made some guesses that might need to modify in the future. To my understanding, the evaluation of the segmentation should be a separate from the detection, so I decided to evaluate the quality of the segmentation only on those detections considered correct by applying the Pascal criterion.&lt;br /&gt;&lt;a href="http://2.bp.blogspot.com/-ERNrfw5dTQs/TcV2DkKM2AI/AAAAAAAAAiQ/m9IO4PxIjFY/s1600/Segmentation.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 89px;" src="http://2.bp.blogspot.com/-ERNrfw5dTQs/TcV2DkKM2AI/AAAAAAAAAiQ/m9IO4PxIjFY/s400/Segmentation.png" alt="" id="BLOGGER_PHOTO_ID_5604015114950596610" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;If only considering correct detections in terms of bounding box, the object segmentation can be evaluated at the pixel scale. The object detection segments the image pixels in two classes: &lt;span style="font-style: italic;"&gt;positive&lt;/span&gt; when representing the object, &lt;span style="font-style: italic;"&gt;negative&lt;/span&gt; when not assigned to the object. If a pixel-wise truth is available, the amount of true positives as well as true and false negatives can be counted. These measures can be used to compute the precision and recall of the object segmentation, and can also be combined in a single measure as the &lt;a href="http://sve.loni.ucla.edu/instructions/metrics/jaccard/"&gt;Jaccard Similarity Index&lt;/a&gt;, as Neus &lt;a href="http://bitsearch.blogspot.com/2010/08/methodology-for-evaluating-interactive.html"&gt;presented&lt;/a&gt; when evaluating interactive segmentation. However, in the Gu et al work they seem to only consider the precision, so I took the same criteria for comparison purposes.&lt;br /&gt;&lt;a href="http://3.bp.blogspot.com/-Y429BG8A4Tk/TcV3DetS6PI/AAAAAAAAAiY/WmNaAIRbx7U/s1600/JaccardIndex.gif"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 298px; height: 207px;" src="http://3.bp.blogspot.com/-Y429BG8A4Tk/TcV3DetS6PI/AAAAAAAAAiY/WmNaAIRbx7U/s400/JaccardIndex.gif" alt="" id="BLOGGER_PHOTO_ID_5604016212998809842" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;The measures are ready now but I still need to improve the generation as the ground truth, as I am currently considering the masks generated by &lt;a href="http://bitsearch.blogspot.com/2010/08/rectangle-expansion-and-inclusion.html"&gt;mapping the ground truth bounding box&lt;/a&gt; onto the image segmentation generated with the &lt;a href="http://bitsearch.blogspot.com/2010/04/interactive-segmentation-studying.html"&gt;Binary Partition Tree&lt;/a&gt; technique developed by &lt;a href="http://directori.upc.edu/directori/dadesPersona.jsp?id=1059234"&gt;Jordi Pont-Tuset&lt;/a&gt;. Although results are &lt;a href="http://bitsearch.blogspot.com/2010/08/interactive-object-segmentation-with.html"&gt;reasonable&lt;/a&gt;, I will introduce two modifications: firstly use the accurate contour also included in the ground truth data instead of the bounding box to map it into my image segmentation. Secondly, compare the measures with the actual mask in the ground truth, which will also provide an estimation of the overall performance of the system: Jordi's image segmentation and my object detection+segmentation.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-8446720884444356637?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/8446720884444356637/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/05/evaluation-of-local-detections-and.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/8446720884444356637'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/8446720884444356637'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/05/evaluation-of-local-detections-and.html' title='Evaluation of object detections and segmentations'/><author><name>Xavi Giró-i-Nieto</name><uri>https://profiles.google.com/110271653716466055491</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh5.googleusercontent.com/--EaBZ1Bzzsg/AAAAAAAAAAI/AAAAAAAAAwU/4J6xIhZ3Xyo/s512-c/photo.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/-VR_PvdtWHzw/TcV2CIT0jjI/AAAAAAAAAhw/l7MdGZZOi18/s72-c/DetectionSegmentation.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-7071065804488737249</id><published>2011-04-21T16:32:00.008+02:00</published><updated>2011-04-21T16:59:11.237+02:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='evaluation'/><category scheme='http://www.blogger.com/atom/ns#' term='Eli'/><category scheme='http://www.blogger.com/atom/ns#' term='classification'/><title type='text'>Dataset generation</title><content type='html'>&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;In this post I will discuss two different techniques for dataset generation that have been studied and implemented for my project on semantic shot detection: modified K-fold cross-validation and repeated random sub-sampling&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;For testing a multiclass classifier, the annotated positive and negative instances of all classes have to be split into a trainset and a testset so the trainer and the detector use different sample data for the experiment. Because of the arquitecture of the classifier used in my project, based on binary classifiers, each class annotations are partitioned in order to train and test each semantic shot classifier.&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;In this case, as there were few labeled instances, 80% of the each class sample data has been used to train its classifier while the 20% left has been used for detection. As there are classes with a few positive instances and much more negative instances (all the positive instances of the other classes), each percentage has been applied separately to positive and negative instances, ensuring at least positive instances to train the system.&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;All instances belonging to the trainset clutter class have been deleted so the trainer does not include the class when generating the model, but the detector can measure its performance when detecting.&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;b&gt;&lt;span class="Apple-style-span"&gt;Modified K-fold cross-validation&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;The K-fold cross-validation algorithm generates K different data subsets, making sure all data is used for both training and detection for each iteration. As a modification of the K-fold cross-validation, where data ara partitioned into K equal parts and the ratio between the sizes of the different subsets is not defined by the user, the parameter K has been calculated automaticaly to build the testset and trainset for the given trainset percentage, allowing more flexibility. This procedure is repeated K times and the predictions of the K dataset are averaged.&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;The whole procedure can be done several times with aleatory data order for each iteration. The following picutre illustrates one iteration of the process:&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;img src="http://4.bp.blogspot.com/-lC0xvoM94hs/TbBCqfLsJhI/AAAAAAAAADM/Tm5shi_67Fk/s400/Kfold.jpg" style="text-align: justify;display: block; margin-top: 0px; margin-right: auto; margin-bottom: 10px; margin-left: auto; cursor: pointer; width: 247px; height: 400px; " border="0" alt="" id="BLOGGER_PHOTO_ID_5598047634513995282" /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;b&gt;&lt;span class="Apple-style-span"&gt;Repeated random sub-sampling&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;Random sub-sampling consists on sorting the data in an aleatory order and then dividing it into the dataset and trainset for the given percentatges. For each iteration a single trainset and dataset is build. As in the K-fold cross-validation the algorithm has been applied separately to each class to train each classifier.&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;b&gt;&lt;span class="Apple-style-span"&gt;Conclusions &lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;Random-subsampling does not ensure all instances are both used for training and detection, but more iterations can be done with less time, as random sub-sampling represents 1-fold iteration of our modified cross-validation. Predictions using this method give a realistic estimation of the classifier performance with external validation data, while cross-validation usually gives overly optimistic estimations.&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;Because of the time cost of the K-fold cross-validation, the diversity of the datasets that already offers random sub-sampling and its realistic estimation, the repeated random sub-sampling algorithm has been chosen for the experiment dataset builder. &lt;/span&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-7071065804488737249?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/7071065804488737249/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/04/dataset-generation.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/7071065804488737249'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/7071065804488737249'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/04/dataset-generation.html' title='Dataset generation'/><author><name>Eli Carcel</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/-lC0xvoM94hs/TbBCqfLsJhI/AAAAAAAAADM/Tm5shi_67Fk/s72-c/Kfold.jpg' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-7970319040435043355</id><published>2011-04-21T11:33:00.007+02:00</published><updated>2011-04-21T11:55:25.569+02:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='conference'/><category scheme='http://www.blogger.com/atom/ns#' term='Monica'/><category scheme='http://www.blogger.com/atom/ns#' term='retrieval'/><category scheme='http://www.blogger.com/atom/ns#' term='xavi'/><title type='text'>Reranking at the ACM ICMR in Trento</title><content type='html'>Last Monday I had the chance to present my joint work with &lt;a href="http://bitsearch.blogspot.com/search/label/Monica"&gt;Monica Alfaro Vendrell&lt;/a&gt; and Ferran Marques Acosta in the &lt;a href="http://www.icmr2011.org/"&gt;ACM ICMR 2011&lt;/a&gt; held in Trento. This is a new conference resulting of the fusion of the (now deprecated) CIVR and MIR.&lt;br /&gt;&lt;br /&gt;Our contribution proposes some modifications to the classic solution for image ranking based on the &lt;a href="http://bitsearch.blogspot.com/2010/07/random-walk-step-by-step.html"&gt;random walk&lt;/a&gt;, originally proposed by &lt;a href="http://doi.acm.org/10.1145/1291233.1291446"&gt;Hsu, Kennedy and Chang&lt;/a&gt; in 2007. As Monica &lt;a href="http://bitsearch.blogspot.com/2010/08/filtering-similarity-graph-for.html"&gt;reported&lt;/a&gt; in this same blog last Fall, we introduced two types of filtering in the Similarity Graph to boost diversity while keeping the relevance estimated during the random walk. You can see the poster we prepared in the next figure.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://1.bp.blogspot.com/-j4SJ1Hb4Qxc/Ta_7xhAlMHI/AAAAAAAAAgs/Bp8H8h8OKVY/s1600/2011-giro-icmr.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 283px; height: 400px;" src="http://1.bp.blogspot.com/-j4SJ1Hb4Qxc/Ta_7xhAlMHI/AAAAAAAAAgs/Bp8H8h8OKVY/s400/2011-giro-icmr.png" alt="" id="BLOGGER_PHOTO_ID_5597969689937784946" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;Two additional proposals were introduced in the paper: a feature fusion based on node probabilities instead of feature similarities, and a new metric to measure the diversity of video assets. This last contribution was introduced to adapt the classic S-recall measure to our scenario, where every keyframe is always associated to one (and only one) asset.&lt;br /&gt;&lt;br /&gt;I received several questions and suggestions about our work. The first thing I realized is that many attendees called the random walk algorithm as Pagerank, the famous adaptation of the technique at Google to rank the search results. An interesting proposal came from &lt;a href="http://www.eurecom.fr/%7Ehuet/"&gt;Benoit Huet&lt;/a&gt; (EURECOM), who suggested that in order to prove the impact of the probabilities fusion compared to the similarity ones we could compare the two approaches in our experiments. He also wondered whether the diversity measures we obtained were related to the types of queries we considered and, to be more precise, on the amount of relevant assets.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://4.bp.blogspot.com/-N19vWv_AF-4/Ta_-94t3g1I/AAAAAAAAAhE/bUyWgoMg_LI/s1600/poster.JPG"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 298px;" src="http://4.bp.blogspot.com/-N19vWv_AF-4/Ta_-94t3g1I/AAAAAAAAAhE/bUyWgoMg_LI/s400/poster.JPG" alt="" id="BLOGGER_PHOTO_ID_5597973200995058514" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;Finally some other visitors wondered what would come next. On the short term, next July Monica will &lt;a href="http://bitsearch.blogspot.com/2011/03/textual-similarity-graph.html"&gt;introduce textual features&lt;/a&gt; through the TF-IDF descriptor extraction she implemented last Winter. So for sure the evaluation of the gain by adding textual features will be a must experiment. If trying to go one step beyond, we have long discussed about introducing the mutual reinforcement approach &lt;a href="http://dx.doi.org/10.1145/1816041.1816048"&gt;proposed last Summer at CIVR 2010&lt;/a&gt; by Ting Yao, Tao Mei and &lt;a href="http://www.cs.cityu.edu.hk/%7Ecwngo/"&gt;Chong-Wah Ngo&lt;/a&gt; from &lt;a href="http://www.cs.cityu.edu.hk/%7Ecwngo/"&gt;VIREO&lt;/a&gt; at City University of Hong Kong. Their work has continued wiith a new contribution presented also at ACM ICMR by &lt;a href="http://www.cs.cityu.edu.hk/%7Ehktan/"&gt;Hung-Khoon  Tan&lt;/a&gt; and Prof. Ngo. In this paper they explore different fusion strategies in the context of a similarity graph for every different feature.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://4.bp.blogspot.com/-EPSqYPjAPpU/Ta_-xGWCwpI/AAAAAAAAAg8/W815K7yuDGQ/s1600/Ngo.JPG"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 299px;" src="http://4.bp.blogspot.com/-EPSqYPjAPpU/Ta_-xGWCwpI/AAAAAAAAAg8/W815K7yuDGQ/s400/Ngo.JPG" alt="" id="BLOGGER_PHOTO_ID_5597972981314929298" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;Apart from the official program, I must highlight that this was the first congress I attended with some real activity in Twitter under the hashtag &lt;a href="http://twitter.com/#%21/search/%23icmr2011"&gt;#icmr2011&lt;/a&gt;. This tool made the conference a bit more exciting and helped discussing serious and not-so serious matters related to the multimedia retrieval community. I am pretty sure that online discussion is a practice that it is here to stay, and that also helps as a personal log of the most interesting proposals seen on the slides or posters.&lt;br /&gt;&lt;br /&gt;I cannot conclude without highlighting the beautiful landscape that surrounded us in Trento. The town in majestically surrounded by the Alps and a graceful river offered me the change to keep a bit fit during these too much pasta days :)&lt;br /&gt;&lt;br /&gt;&lt;a href="http://4.bp.blogspot.com/--usGTSh1pzM/Ta_-dE1eXyI/AAAAAAAAAg0/1HTwZmDP2X0/s1600/RunTrento.jpg"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 312px;" src="http://4.bp.blogspot.com/--usGTSh1pzM/Ta_-dE1eXyI/AAAAAAAAAg0/1HTwZmDP2X0/s400/RunTrento.jpg" alt="" id="BLOGGER_PHOTO_ID_5597972637312507682" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="display: block;" id="formatbar_Buttons"&gt;&lt;span class=" down" style="display: block;" id="formatbar_CreateLink" title="Enllaç"&gt;&lt;img src="http://www.blogger.com/img/blank.gif" alt="Enllaç" class="gl_link" border="0" /&gt;&lt;/span&gt;&lt;/span&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-7970319040435043355?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/7970319040435043355/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/04/reranking-at-acm-icmr-in-trento.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/7970319040435043355'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/7970319040435043355'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/04/reranking-at-acm-icmr-in-trento.html' title='Reranking at the ACM ICMR in Trento'/><author><name>Xavi Giró-i-Nieto</name><uri>https://profiles.google.com/110271653716466055491</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh5.googleusercontent.com/--EaBZ1Bzzsg/AAAAAAAAAAI/AAAAAAAAAwU/4J6xIhZ3Xyo/s512-c/photo.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://1.bp.blogspot.com/-j4SJ1Hb4Qxc/Ta_7xhAlMHI/AAAAAAAAAgs/Bp8H8h8OKVY/s72-c/2011-giro-icmr.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-842971277491112599</id><published>2011-04-11T21:57:00.004+02:00</published><updated>2011-04-11T22:43:57.858+02:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Marcel'/><category scheme='http://www.blogger.com/atom/ns#' term='user interface'/><title type='text'>Web algorithm demonstrator architecture</title><content type='html'>Apart from the demonstration, this week I've been preparing the  interface of the web page. It wasn't an easy task. We can talk about two  kind of interfaces, the first one, the programmer interface, whose name  is “GPIapp”, and is a class which allows programmers to do the most  common functionality such as change the title of the demonstration, or  set a little description about this algorithm in an easy way. &lt;p&gt;&lt;a href="http://3.bp.blogspot.com/-fdRp_-1G7IA/TaNnc8qiiFI/AAAAAAAAADc/_-2Z4q5NkxQ/s1600/page.jpg"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 400px; height: 400px;" src="http://3.bp.blogspot.com/-fdRp_-1G7IA/TaNnc8qiiFI/AAAAAAAAADc/_-2Z4q5NkxQ/s400/page.jpg" alt="" id="BLOGGER_PHOTO_ID_5594428909142837330" border="0" /&gt;&lt;/a&gt;&lt;/p&gt;&lt;p&gt;This  interface succeeds in that all the files of the web page I've been  working with, classes GPIapp, GPIheader, GPIcontent, GPIfooter and  GPIdemo are now transparent for the programmer, because they have just  to inherit a class from GPIapp and they have already got the whole web  page in the demo, and all they put inside, is going to be placed in the  correct position.&lt;br /&gt;&lt;/p&gt;&lt;p&gt;This is very important because programmers  are able to make web pages very fast and without any knowledge of  neither HTML, CSS nor other web technology, and also they are avoided to  know too much about Wt.&lt;/p&gt;&lt;p&gt;This is the hierarchy of the new classes. As you can see, almost all the classes inherit from WContainerWidget, this is because this one is the "div" element in HTML, and I needed a framework for my web page.&lt;br /&gt;&lt;/p&gt;&lt;p&gt;&lt;a href="http://2.bp.blogspot.com/-XERBeox8nlk/TaNnZDiI2PI/AAAAAAAAADU/Y3PGkBHxVJ8/s1600/hierarchy.jpg"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 400px; height: 154px;" src="http://2.bp.blogspot.com/-XERBeox8nlk/TaNnZDiI2PI/AAAAAAAAADU/Y3PGkBHxVJ8/s400/hierarchy.jpg" alt="" id="BLOGGER_PHOTO_ID_5594428842267171058" border="0" /&gt;&lt;/a&gt;&lt;/p&gt; &lt;p&gt;The second interface, as you know is the  graphical interface that you can see when you enter at the web page.  The design of this interface is a flexible design. It means that it  doesn't matter what resolution or display size you have, the web page is  going to fit itself to the screen.&lt;/p&gt; &lt;p&gt;Of course the page allows programmers to put a description as long as they want, and the page is resized to fit that text.&lt;br /&gt;&lt;/p&gt; &lt;p&gt;&lt;a href="http://2.bp.blogspot.com/-rV5l0X0Ofjc/TaNii5Y5iiI/AAAAAAAAADM/3OJT2LVRwAE/s1600/Screenshot.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 400px; height: 296px;" src="http://2.bp.blogspot.com/-rV5l0X0Ofjc/TaNii5Y5iiI/AAAAAAAAADM/3OJT2LVRwAE/s400/Screenshot.png" alt="" id="BLOGGER_PHOTO_ID_5594423513784617506" border="0" /&gt;&lt;/a&gt;&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-842971277491112599?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/842971277491112599/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/04/web-algorithm-demonstrator-architecture.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/842971277491112599'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/842971277491112599'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/04/web-algorithm-demonstrator-architecture.html' title='Web algorithm demonstrator architecture'/><author><name>Marcel</name><uri>http://www.blogger.com/profile/06357583442533980245</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://3.bp.blogspot.com/-fdRp_-1G7IA/TaNnc8qiiFI/AAAAAAAAADc/_-2Z4q5NkxQ/s72-c/page.jpg' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-1762955322662251703</id><published>2011-04-10T20:18:00.009+02:00</published><updated>2011-04-12T10:02:19.601+02:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='segmentation'/><category scheme='http://www.blogger.com/atom/ns#' term='region'/><category scheme='http://www.blogger.com/atom/ns#' term='Marcel'/><category scheme='http://www.blogger.com/atom/ns#' term='user interface'/><category scheme='http://www.blogger.com/atom/ns#' term='Binary Partition Trees'/><title type='text'>Dynamic web demo of image segmentation</title><content type='html'>After long time understanding &lt;a href="http://www.webtoolkit.eu/"&gt;Wt&lt;/a&gt;, the C++ library which I have talked about in my &lt;a href="http://bitsearch.blogspot.com/search/label/Marcel"&gt;last posts&lt;/a&gt;, I am glad to release my first web demonstration. This demonstration consists on an image and a slider. &lt;p&gt;The algorithm I want to demonstrate in this occasion the binary partition tree creation algorithm, an imageplus software that creates a hierarchical partition binary tree with the uniform regions of the image.&lt;/p&gt; &lt;p&gt;Then, the slider at first determines the maxim number of regions the algorithm can segment. This is the 100% of segmentation regions, if the slider is moved down, the number of regions for the segmentation decreases.&lt;br /&gt;&lt;/p&gt;&lt;p&gt;In the worst situation, for instance, I had a completely uniform (plain) image, the slider just would have allowed one level, because the minimum is one region, and the maximum is also one region, because is already uniform.&lt;/p&gt;&lt;p&gt; In most images, the diversity of pixel values will allow the definition  of several regions. In order to set a maximum value to the slider, the  segmentation algorithm is capable of detecting all the plain regions in  the image to compute the maximum amount of regions that a the finest  possible partition of an image will include.&lt;/p&gt; &lt;p&gt;To know how it works, have a look at &lt;a href="http://bitsearch.blogspot.com/2010/04/interactive-segmentation-studying.html"&gt;this article&lt;/a&gt;&lt;br /&gt;&lt;/p&gt;&lt;p&gt;&lt;a href="http://3.bp.blogspot.com/-37ukWeRlnLI/TaH1PyUcl2I/AAAAAAAAACk/K_11OSXDXjs/s1600/Binary%2BPartition%2BTrees%2BBLOG.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 461px; height: 261px;" src="http://3.bp.blogspot.com/-37ukWeRlnLI/TaH1PyUcl2I/AAAAAAAAACk/K_11OSXDXjs/s400/Binary%2BPartition%2BTrees%2BBLOG.png" alt="" id="BLOGGER_PHOTO_ID_5594021863725242210" border="0" /&gt;&lt;/a&gt;&lt;/p&gt;&lt;p&gt;Once the leaves of the tree are computed, the demonstration displays  their associated regions by uniformly painting them with their average  color, they are called “the leaves of the tree”. The slider then, reflects the number of leaves the tree is going to have.&lt;/p&gt;&lt;p&gt;Here there are a few screen-shots of the demonstration.&lt;/p&gt;&lt;p&gt;&lt;a href="http://1.bp.blogspot.com/-GNK3O9W_t_o/TaNe6WXSl2I/AAAAAAAAADE/RbBYQ0LMqRU/s1600/sonic%2Boriginal.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 400px; height: 227px;" src="http://1.bp.blogspot.com/-GNK3O9W_t_o/TaNe6WXSl2I/AAAAAAAAADE/RbBYQ0LMqRU/s400/sonic%2Boriginal.png" alt="" id="BLOGGER_PHOTO_ID_5594419518652979042" border="0" /&gt;&lt;/a&gt;&lt;/p&gt;&lt;p&gt;&lt;a href="http://3.bp.blogspot.com/-Zr4DXBpUYN8/TaNexUnALyI/AAAAAAAAAC0/uuyKT6htXBg/s1600/sonic60.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 400px; height: 228px;" src="http://3.bp.blogspot.com/-Zr4DXBpUYN8/TaNexUnALyI/AAAAAAAAAC0/uuyKT6htXBg/s400/sonic60.png" alt="" id="BLOGGER_PHOTO_ID_5594419363563188002" border="0" /&gt;&lt;/a&gt;&lt;/p&gt;&lt;p&gt;&lt;a href="http://3.bp.blogspot.com/-eBRQF0SfN4c/TaNe2v6HehI/AAAAAAAAAC8/LPUpzZueSCc/s1600/sonic%2B30.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 400px; height: 226px;" src="http://3.bp.blogspot.com/-eBRQF0SfN4c/TaNe2v6HehI/AAAAAAAAAC8/LPUpzZueSCc/s400/sonic%2B30.png" alt="" id="BLOGGER_PHOTO_ID_5594419456790460946" border="0" /&gt;&lt;/a&gt;&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-1762955322662251703?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/1762955322662251703/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/04/first-web-demonstration-binary.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/1762955322662251703'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/1762955322662251703'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/04/first-web-demonstration-binary.html' title='Dynamic web demo of image segmentation'/><author><name>Marcel</name><uri>http://www.blogger.com/profile/06357583442533980245</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://3.bp.blogspot.com/-37ukWeRlnLI/TaH1PyUcl2I/AAAAAAAAACk/K_11OSXDXjs/s72-c/Binary%2BPartition%2BTrees%2BBLOG.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-1659578416096608865</id><published>2011-04-06T13:17:00.006+02:00</published><updated>2011-04-08T10:43:43.574+02:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Martí'/><category scheme='http://www.blogger.com/atom/ns#' term='HbbTv'/><category scheme='http://www.blogger.com/atom/ns#' term='test'/><title type='text'>Speed test</title><content type='html'>&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;One of my tasks this week has been to separate the pages of &lt;/span&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt; HbbTV application that my team is developing. &lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;It is a very big application, composed by a hook, an initial page, VOD page, EPG page, a searcher, and related videos...&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;All of these different pages are showed and hidden but they are still in the same application. &lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;I developed the way to separate them and to write only the code who make them work separately. &lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;When  you call the url of every page, you make a GET with a variable called  standalone, who allows you to chose if you want them united or  separated.  &lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;Then,  I have tested the time of load of every single page called with  standalone = true (separately) or standalone = false (united). &lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;I have done it with &lt;/span&gt;&lt;a href="http://www.mozilla-europe.org/es/"&gt;&lt;span style="font-style: normal; text-decoration: underline; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;Mozilla Firefox &lt;/span&gt;&lt;/a&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;and with one of the three decoders we have in our company: the one that has incorporated the new HbbTV enabled television. &lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;The times of load we obtained were:&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; font-weight: bold;font-family:Arial;font-size:11pt;"  &gt;MOZILLA FIREFOX&lt;/span&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;img style="width: 491px; height: 144px;" src="https://lh3.googleusercontent.com/9cXsYnu1pwCv4QXrMyQq4jRTqC-OTCQYozw45525NbEoUrnjtBDAH6Rq7NYrh5iuJtsxfkSrDovKzfLIc6cfkuks3WHIy8x9eGURM4lwUDGcb_8LUas" /&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; font-weight: bold;font-family:Arial;font-size:11pt;"  &gt;TELEVISION&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; font-weight: bold;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; font-weight: bold;font-family:Arial;font-size:11pt;"  &gt;HOOK:&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;standalone = true;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;7sec, 8sec, 7sec, 7sec.&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;standalone = false;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;9sec, 9.5sec, 9sec.&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; font-weight: bold;font-family:Arial;font-size:11pt;"  &gt;INITIAL:&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;standalone = true;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;7.8sec, 10sec, 11sec, 8sec, 11sec, 7.7sec, 8.6 sec. &lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;standalone = false;  &lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;10sec, 10sec, 9.8sec, 9.7sec, 9.3sec, 9.8sec. &lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; font-weight: bold;font-family:Arial;font-size:11pt;"  &gt;CONCLUSIONS&lt;/span&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; font-weight: bold;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;- To load JS code is not so critical, the code execution is most costly. &lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;- The hook page is faster than the initial page because contains less JavaScript code. &lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;- The television is very slow executing JS code. &lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;- Images take a long time to load. &lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;-  This television suports window.location sentence, and it runs  applications not listed in the AIT of the broadcaster TDT steam. &lt;/span&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-1659578416096608865?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/1659578416096608865/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/04/velocity-test_06.html#comment-form' title='2 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/1659578416096608865'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/1659578416096608865'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/04/velocity-test_06.html' title='Speed test'/><author><name>Martí Alcon</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>2</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-1112338602835337384</id><published>2011-04-06T12:39:00.002+02:00</published><updated>2011-08-17T17:07:09.677+02:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Martí'/><category scheme='http://www.blogger.com/atom/ns#' term='HbbTv'/><title type='text'>My application as a widget</title><content type='html'>&lt;span style="  color: rgb(0, 0, 0); background- font-weight: normal; font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;color:transparent;" id="internal-source-marker_0.04762017565548271"   &gt;&lt;/span&gt;&lt;span style="  color: rgb(0, 0, 0); background- font-weight: normal; font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;color:transparent;"   &gt;A  widget is a module that you can insert in other applications or web  pages. I have to develop a launcher widget, to insert it where the final  user or the broadcaster wants.&lt;/span&gt;&lt;br /&gt;&lt;span style="  color: rgb(0, 0, 0); background- font-weight: normal; font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;color:transparent;"   &gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="  color: rgb(0, 0, 0); background- font-weight: normal; font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;color:transparent;"   &gt;I started to program following my team’s structure, being coherent with their code structuring. &lt;/span&gt;&lt;br /&gt;&lt;span style="  color: rgb(0, 0, 0); background- font-weight: normal; font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;color:transparent;"   &gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="  color: rgb(0, 0, 0); background- font-weight: normal; font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;color:transparent;"   &gt;So I finished it in two style variables, horizontal and vertical. &lt;/span&gt;&lt;br /&gt;&lt;span style="  color: rgb(0, 0, 0); background- font-weight: normal; font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;color:transparent;"   &gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="  color: rgb(0, 0, 0); background- font-weight: normal; font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;color:transparent;"   &gt;It’s not so easy because the navigation is a little complicated to adapt to both of them.  &lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="  color: rgb(0, 0, 0); background- font-weight: normal; font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;color:transparent;"   &gt;&lt;/span&gt;&lt;img src="https://lh4.googleusercontent.com/_RufO7DIH47BzEVf0Q5NvaOm4KNig7DuU9fEd0RuxVnTiGjkijCTvnDRfsT7JxSTvS3MDo8c6vLqe9nWCrwyqhlP8Fsqp__LZrHeLJ0fVNh9mW7g0Yo" height="277px;" width="191px;" /&gt;&lt;br /&gt;&lt;span style="  color: rgb(0, 0, 0); background- font-weight: normal; font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;color:transparent;"   &gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="  color: rgb(0, 0, 0); background- font-weight: normal; font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;color:transparent;"   &gt;An example of my widget (without look and feel)&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="  color: rgb(0, 0, 0); background- font-weight: normal; font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;color:transparent;"   &gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="  color: rgb(0, 0, 0); background- font-weight: normal; font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:11pt;color:transparent;"   &gt;It works horizontal or vertical with writing the type in the configuration parameter. &lt;/span&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-1112338602835337384?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/1112338602835337384/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/04/my-application-as-widget.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/1112338602835337384'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/1112338602835337384'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/04/my-application-as-widget.html' title='My application as a widget'/><author><name>Martí Alcon</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-1210510183677738239</id><published>2011-04-04T10:09:00.011+02:00</published><updated>2011-08-17T17:06:42.572+02:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Martí'/><category scheme='http://www.blogger.com/atom/ns#' term='HbbTv'/><title type='text'>Types of launchers</title><content type='html'>&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; color: rgb(0, 0, 0);font-family:Arial;font-size:100%;"  &gt;As  I explained in my &lt;a href="http://bitsearch.blogspot.com/2011/03/introducing-launcher-hbbtv-application.html"&gt;last post&lt;/a&gt;, a launcher is a kind of application that  allows to start another applications being consistent with their life  cycle.&lt;/span&gt;&lt;span style="color: rgb(0, 0, 0);font-size:100%;" &gt;&lt;br /&gt;&lt;/span&gt;&lt;span style="color: rgb(0, 0, 0);font-size:100%;" &gt;&lt;br /&gt;&lt;/span&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; color: rgb(0, 0, 0);font-family:Arial;font-size:100%;"  &gt;Nowadays, most of the digital systems for video or applications and even the computers contain launchers.   &lt;/span&gt;&lt;span style="color: rgb(0, 0, 0);font-size:100%;" &gt;&lt;br /&gt;&lt;/span&gt;&lt;span style="color: rgb(0, 0, 0);font-size:100%;" &gt;&lt;br /&gt;&lt;/span&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; color: rgb(0, 0, 0);font-family:Arial;font-size:100%;"  &gt;I tried to classify these in groups:&lt;/span&gt;&lt;span style="color: rgb(0, 0, 0);font-size:100%;" &gt;&lt;br /&gt;&lt;/span&gt;&lt;span style="color: rgb(0, 0, 0);font-size:100%;" &gt;&lt;br /&gt;&lt;/span&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; color: rgb(0, 0, 0);font-family:Arial;font-size:100%;"  &gt;- Mobile phones launchers: &lt;/span&gt;&lt;span style="color: rgb(0, 0, 0);font-size:100%;" &gt;&lt;br /&gt;&lt;/span&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; color: rgb(0, 0, 0);font-family:Arial;font-size:100%;"  &gt;&lt;br /&gt;The format of these applications is very usable and reduced. Mobiles  are often small than other systems and they have lots of apps. &lt;/span&gt;&lt;span style="color: rgb(0, 0, 0);font-size:100%;" &gt;&lt;br /&gt;&lt;br /&gt;&lt;/span&gt;&lt;div style="text-align: center; color: rgb(0, 0, 0);"&gt;&lt;span style="font-size:100%;"&gt;&lt;img src="https://lh3.googleusercontent.com/ab7ly5sJFkh_p6I3mHsBiXUBGburTS_h7ZX-4hMR6T-mbGHp4_RkJ_uot5_RVWeflEVH0kmzkpvMA-4zbZ5tECViMItGh8Onm6bnMr3Rev7MCa1ELdw" height="229px;" width="283px;" /&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;span style="color: rgb(0, 0, 0);font-size:100%;" &gt;&lt;br /&gt;&lt;/span&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; color: rgb(0, 0, 0);font-family:Arial;font-size:100%;"  &gt;- Computer launchers:&lt;/span&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; color: rgb(0, 0, 0);font-family:Arial;font-size:100%;"  &gt;&lt;br /&gt;&lt;br /&gt;I believe that desktops and other similar widgets can be understood  like launcher applications. There are a lot of formats like  dock (Mac OS) or the Initial button on other operative systems.&lt;/span&gt;&lt;span style="color: rgb(0, 0, 0);font-size:100%;" &gt;&lt;br /&gt;&lt;br /&gt;&lt;/span&gt;&lt;span style="color: rgb(0, 0, 0);font-size:100%;" &gt;&lt;br /&gt;&lt;/span&gt;&lt;div style="text-align: center; color: rgb(0, 0, 0);"&gt;&lt;span style="font-size:100%;"&gt;&lt;img src="https://lh6.googleusercontent.com/CAn9PlN8Qz3M2joJpejXExqt4FTQkieOL9b2acLVN7FF3K62rLfaanVNRjMuDDAzXufzHZCjnu7bNgbzJfDZVBxSQoPku34mtdr6jPzsmS8wlDQy6PE" height="177px;" width="241px;" /&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;span style="color: rgb(0, 0, 0);font-size:100%;" &gt;&lt;br /&gt;&lt;/span&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; color: rgb(0, 0, 0);font-family:Arial;font-size:100%;"  &gt;- Television launchers: &lt;/span&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; color: rgb(0, 0, 0);font-family:Arial;font-size:100%;"  &gt;&lt;br /&gt;&lt;br /&gt;With  the new technologies of televisions, the launcher is very important.  Televisions are capable to do a lot of things so is a good idea  to organise them into an application to manage them easily.    &lt;/span&gt;&lt;span style="color: rgb(0, 0, 0);font-size:100%;" &gt;&lt;br /&gt;&lt;br /&gt;&lt;/span&gt;&lt;div style="text-align: center; color: rgb(0, 0, 0);"&gt;&lt;span style="font-size:100%;"&gt;&lt;img src="https://lh5.googleusercontent.com/7Oj7ZPfAsqzBGls2B-YTTDZmZJHwNUvW4NxHix2vNefuozU_KSOyzvQNxdr_oA7Goq3_djk1lQ6d3NgOHlDSL-ANggIKwZwDGARYU0zRolN70x9wrTU" height="220px;" width="375px;" /&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;span style="color: rgb(0, 0, 0);font-size:100%;" &gt;&lt;br /&gt;&lt;/span&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; color: rgb(0, 0, 0);font-family:Arial;font-size:100%;"  &gt;- Set-top boxes launchers:&lt;/span&gt;&lt;span style="color: rgb(0, 0, 0);font-size:100%;" &gt;&lt;br /&gt;&lt;/span&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; color: rgb(0, 0, 0);font-family:Arial;font-size:100%;"  &gt;&lt;br /&gt;Set-top boxes have already a launcher. This point is very important to develop my project. &lt;/span&gt;&lt;span style="color: rgb(0, 0, 0);font-size:100%;" &gt;&lt;br /&gt;&lt;br /&gt;&lt;/span&gt;&lt;div style="text-align: center; color: rgb(0, 0, 0);"&gt;&lt;span style="font-size:100%;"&gt;&lt;img src="https://lh3.googleusercontent.com/1kUaw7e5MIY5YKUSUHvg5KwZwk5wnutwGX0H84tOySbbLxFIrYgnqIeu9IXsf6VaSnTnbBEP-Vv-fo-kh90hkfG1ajv5gVxNsRa5v_-3vdRLFvtgz3E" height="201px;" width="233px;" /&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;span style="color: rgb(0, 0, 0);font-size:100%;" &gt;&lt;br /&gt;&lt;/span&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; color: rgb(0, 0, 0);font-family:Arial;font-size:100%;"  &gt;- Console launchers:&lt;/span&gt;&lt;span style="color: rgb(0, 0, 0);font-size:100%;" &gt;&lt;br /&gt;&lt;/span&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; color: rgb(0, 0, 0);font-family:Arial;font-size:100%;"  &gt;&lt;br /&gt;The video game environments have also applications to start games, networks and other things.&lt;br /&gt;&lt;/span&gt;&lt;span style="color: rgb(0, 0, 0);font-size:100%;" &gt;&lt;br /&gt;&lt;/span&gt;&lt;div style="text-align: center; color: rgb(0, 0, 0);"&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline;font-family:Arial;font-size:100%;"  &gt;&lt;/span&gt;&lt;img src="https://lh3.googleusercontent.com/lnj456lSyf33EsXTF8_Al1ck_HVQEyNJnloYPCO6VEChghlsH8lotzX4Hmop2SjHwKMRV00A-5giiKv0bnVqjWomcqJ_5KfSOh9IswL6VbpGnlBcNYc" height="194px;" width="259px;" /&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-1210510183677738239?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/1210510183677738239/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/04/types-of-launchers.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/1210510183677738239'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/1210510183677738239'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/04/types-of-launchers.html' title='Types of launchers'/><author><name>Martí Alcon</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-8652883506852331486</id><published>2011-04-02T13:25:00.008+02:00</published><updated>2011-04-06T17:00:08.142+02:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Marcel'/><category scheme='http://www.blogger.com/atom/ns#' term='user interface'/><title type='text'>Integrating Wt + Imageplus</title><content type='html'>These last weeks I've been getting started with a C++ library whose purpose is to build web applications. This library is called &lt;a href="http://www.webtoolkit.eu/wt"&gt;Wt&lt;/a&gt;, you can have a look at &lt;a href="http://bitsearch.blogspot.com/search/label/Marcel"&gt;my last articles&lt;/a&gt; to know more about it. &lt;p&gt;My project consists on demonstrating algorithms from the &lt;a href="http://gps-tsc.upc.es/imatge/"&gt;UPC video and image processing group&lt;/a&gt;, so an important step in the development of my application would be to be able to use imageplus algorithms, the library of the group, and create Wt classes in order to show them in a web page.&lt;/p&gt; &lt;p&gt;I have to thanks Albert Gil for helping me with the libraries, because the first step was to change the way of compiling the project, in Wt, actually they are using a program called &lt;a href="http://www.cmake.org/"&gt;Cmake&lt;/a&gt;, as well as &lt;a href="http://www.gnu.org/software/make/manual/make.html"&gt;make&lt;/a&gt;.&lt;/p&gt;&lt;p&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="data:image/jpg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wCEAAkGBhAQDw8PDQ8ODw0PDg8PDA0PDA4PDQ0NFRAVFBQQEhIXHiYeFxkjGRISHy8gIzMtLi0sFR4xNTMtOTIuLS0BCQoKDgwOFQ8PFykfHBgpKSkpKSkpKSwpLCkpKikpKSwsLSkpKSksKSksLCkpKSopKSkpKSksKSkpKSkpNikpKf/AABEIAOEA4QMBIgACEQEDEQH/xAAbAAEAAQUBAAAAAAAAAAAAAAAAAQIDBAUHBv/EAEkQAAICAQICBwQDCgoLAAAAAAABAgMEERIFIQYTMUFRYZEHIjJxUoHSFEJTgpKhscHR0xYjJDNUVWJylKQIGCZEg5Oio7PC8f/EABkBAQEBAQEBAAAAAAAAAAAAAAABAgMEBf/EACQRAQEAAgICAQMFAAAAAAAAAAABAhEDIRIxUUFxgRMiMmHw/9oADAMBAAIRAxEAPwDiTk/F+rG5+L9WQwBO5+L9WNz8X6sgATufi/Vjc/F+pAAnc/F+rG5+L9WQAJ3Pxfqxufi/VkACdz8X6sbn4v1ZAAnc/F+rG5+L9WQAJ3Pxfqxufi/UgATufi/Vjc/F+rIAE7n4v1Y3PxfqyABO5+L9WNz8X6sgATufi/Vjc/F+rIAE7n4v1Y3PxfqyABO5+L9WQ5PxfqwQwPUbn4v1BAA8ywGAAAN44XIAAd5wye2dgCRO06zCfB2gFWwbC+JqqQTtIaJcJ8HYADleGX0bAAcMuO4rsABhQAAAAAAAAAACGSQwPTgADzLAYPRx8X1qbBoSol6unU9MhJtaUC7GkyYUF5Vja9RixxytY5k7SdB2bY/UIdQjI2jaDbFeOiiWP4GZtKXEG2BKgtOBsnAszqKvVYOgL86iy4k0zZpAAPNnxfWGwAHnUAAAAAAAAIZJDA9OAAPNMmMQlzMimo+jEkTVSZUKyYQL8Kx7W1RGsuKkyascyoYnkakRruoHUm2WGUTxS6Rq+rHVmdKgt9SXQxHWUuBmSqLbrJoYjgUSiZcqy1KBNKxZQMeyozZRLcomWpWulEpMq6oxpIVmxAAPLy4a7hAAHBQAAAAAIZJDA9OAANBXDmZtcNCmmlP4X9T5Mv7Gu1H0Sqq4mdjVGNRE22HUakRlYuFqbSjhjfcX+FYmrR7bhXAtyXIZZSI8PPhbXcYt2F5HReJcFUFzR5LiekNdPUTKUeZuxtO3kYNlkI9r9OZk59rbZp7kbGQ8qtv4tPmmVbU/hafyeprJIp1aeqbT8V2hdNhZAsSiU15vdP8AK7/rL7jy17n2eZKjFlEtSiZMkWpIxVY84GDbA2comLkQJFjABVNFJnKb6Z9AAPDZrpoABAAAAhkkMD04AA1tcTIhY15rwLUEXEj6MRnY+19vJ/mNvh0965ryNHSzbYM3qtNdW9El2t+COkR63hD00Pf8Fz1GPmeEw+F5cYqc8TKUNNd7xrVy8ddDZw4koQ5Pn2fWc85Mh6DjnEYtNJ/N+Jz7i9+rZm53FdV2misc7ZbK4Tsm+yEISnN/KK5kxx0NXlM1lxuuKcHyqY778XJqh9OzGthD8prQ0VkjrKMingWTZRPJrx7Z41e7rb4wbqr2pOW6XdomvU1zOtdEn/snxb+9l/8AhqORRZnHLds+GlXUya1UZNLtai2l9ZFGTtej+F/mfiey6N9PsjD4Zl4VWE76b/uh2ZG61KrrKIweu2LXJR15vvOf7iy227WTbcyRbaJ4bXZZW3Guyar5SlGucoxWmvNpaLl+gz8DgeTkJvFxsi9L4pU0W2RT8G4prUl6Zaxox7Ymdl4s6puu6udVkfirshKE4/OMuaMSxGFa21Fo2WNwm/Im4YtF1812wppnbJLzUU9CriPRfOx47snCy6IfTtxbq4flNaC2Jk1YAPHyz9xAG0wOiufkQ6zGwcy6vusqxbpwfyklozAy8OymbrvrsqsXxV21yrmvnGSTRzVaAAAhkkMD04AAxcRw3LrFKUNXuUJKM3y5aNppc9O42HWYn4HK/wATT+7MLh1jVkXG3qGm9LdZrZyffBOXPs5eJu1mW/1p/wBzO+wfRRi12Yv4LK/xNX7s6jwf7l4Nwuvijpc8zM2rFjdOMnXGabilJJaLZFzenN6panPI5tn9Z6/8TO+wdi4/0rsw+D8Py8eOPkQlDGhZO6Ns4pSo5SWnNNyjpz8THLfU+R4jD9rPEZWqXWxlHXV1Sro6trw1jFSXqb72iRqnjYfFKouCydsb4RaWsp1ucZPlzktkot9/IwMT2vZE3p9zcP8AlGq/X87NZ0w6eZGbTHGshj0wjZC1SrV6lqlJad6095mfG+UsmhqKr4XTrqhC2Vls4VwXWwWs5SUV974tHQOk/SOno9TTiYNUZ5d0HZbdP4tqe1WT0+JtqWi7Eov6/A9E85wz8FzzFsWXRug5ZGkk7EtPh07+82/tthZHidcnd1ddmJV1ert092dil8Ka7WvVGs+8pjfQjh/tyza5fyiqvJqfxQajVPT+zKK09UzxXFuJY12VkXQpuhXbfbZXBWVQ2QlNyUdqi0tE9NEI2y05Z8V5a5f2ArZ93EF9Usv7B0xwmN3B0jotKv8AgrxXZGxQ1y9VKcXNvqauxqKX5jk1U6O+F/1XVr/0OvdGrJPotxRvI6x65Wl2tvu/xVXL3lu/+nKMfIn3Z238bJ/VEzx+8vurqXQGVf8ABrjeyNihpm7lKyLk/wCRQ10aikuXkcbjPH767/qvr+wdn6C2yfRzjTeR1jUczS3W17P5FD6ST8+RyCrJs7s/b+NlfqiXj/ll91jr3sVvojwvi0pV2SojKcr6pTjKVlaxtZRWij2xTRpcj245ScYYWNjYuNDRVUKvfpBdib5JfipG29lVknwbjTlf1z2XaT1te3+SP6ST9DltEn+H0+u39hymMueW2a6/xnIp4/wK3OlSq83BVspbHzTrip2QjJrXZOtppPsenhqcZhKqTS22LVpa9ZDRavt+E7B7Om6uA8Yuts3VyWTtm3PT3cRRemq17WlyOO1Wy00+6dOWj53fsLx9WxXavaHx6zo9h4eLwqhV02KxWZWyM5dZHb8TacXZLVtuWvZy8vGcH9v+fVJfdUIZdLfvxlGuqzb37ZVxS9UzM4B7b7aKY43EK6M+qMVBWbrYXSguSVilBxsfny179e09Jwi3o/x9zx6KJ4eaqnNOqP3PNJNJzi4PZPRyXKS7+w5WeM/dPyjhOZkUzssmq7Up2TmkrILRSk2l8PmdB9j/AERxMmeTxDNrk8Lh8OslC1xnXZYoym3KOi3RhGOune3Hu1R43jmDbi5WRi2Zfv491lUnrd722TW7s79NfrOw+xLOkuD8T6mUMnKqutshXLfKM5PFj1cJJ6NpyrkvUnJ/vRHiOOe3filt0nh2V4mMnpTTGiiyWxdm+U09Xp4aI9Zi8QXSbgeY8uqpcU4dGU6r647XJ7HOOn0VPZOLj2apPw087/rAZf8AV3DP+Tb9oiz/AEgcxwsrjhcPhGyEoy2Qui9Gmu6fmcFcuTKigrAEMkhgenAAGFiXKMlJxjNLX3J7tr5d+1p/nNiuJQ/ouN/mf3hpqZmVBn0StiuIw/ouN/mf3h7vob7R8eGPLh3FceMuHzTjB1xnNVRlLc4yi25OOvvJx5p9ndpzZGXj4spc9NF3yb0XqxcZlNVHVsfg3RtSVlXEVGPaq3Z7y17veju9eZrOnHE+Fqmujh1XWTVqnbfOFmk4qEo7dzak+ctdNNDw8KoLv3Py5R9XzNxViRup3R+KPavNftRP07O90aPIzormsejVc09b00/FaTOiUdPOE8Xxqsfj8XRlU69XlRU1W20k5xnHXZqktYy93kvLTnedjmnnHnoaywmX4I61DA6KYq6y3LWY1zjTB2WbvJwh2/jNLxOd8X43TdlZFtOLTGmy+2dUJRsjKNcptxTjGe1PRrkuXgaV1kpaDHDXe9q6ZwHpnhVdH+IYds66sq55HU4yryHGzdXWo+9zS1cWu1dhzinNj+Apfz677ZZnzLTehrHHVv8Aaun9EemuDRwPiuLfOqnJyFldRjqvIkrd2LGEdZc0tZJrtXYcxhnRX+70P59f9sx5vUttiYzG2/JendPYnk0vhXFpXVRjQpTd8Kt+s6lje+lrLXXbqu1GLjcA6Kz0tjxKyFb5uiyxwmv7PvQ3fp+ZrvZnx/FxOEcUoyr4VX5MLeorkp7p7sZwSWi8Xoc6hE83jbll3pl0Tp/7RMWWLDhXB6tvD4adbZKNkFdpLcoQTalpu95ylzk/r157iZcYThOVVbUZwlJe/q0pJtfF5FEqyxc+R0xx11FdZysPolxDW6vIfDrpPdOuUZ1QUnzf8XJOC/EehXw3jPR3gPWZGFfPiOfKuVdcYa7FF6PTdptgm1HV83p2I4rMpOeXHfW+kZ3FOMyyL7si6uuVt9s7bH/GJb5ycnot3Jcz0Hs+9ok+E5TtjTGWPalDKpi5KU4J6xlFttKUdXp82u/VeQaDR58r3pXZ+JcM6J8Tm8qOdPh91j33VfzUd75tuE4uKf8AcehhZ38FuHY2RHFst4jnWY91VFkou1U2TrlGM1yjWtG09VrJacjkmhGhgEVlKRUAIZJDA9OAANJjtt6Jam0qx9Pjaj5ffehq1nNcoJQXl2+pcqvb7XzPonttlkQj8EdX9KXP83Z+kpllyk+bf7PkYsZFWpZUZcLTZcJ4hKuxbU5KXuziu1ru08zTV6tpLtZnu1VLauc2tJPwX0V+tm9o3nEsRSW+vRxfhzSZ5fNxmnqjMw+Jzrb0esX8UH8L/YzOs6q1apqEn95Llz8mB5fcUuw2HEOEyXNLT9DNRbGUeUk0WNxXK0tSkUORMK5S+FN/o9SruRTKRl4GHq1OXZ96vF+PyLuPw5LnPm/o931+JmNmLWLdomi3poVSkWpzMCmyRg32F6+0w5yEX0pZABzzy8ZtkAB4mgAAAAAIZJDA9OAAPNMrhMtsH0JWZdM+q4yYPXku01cJGzrtVS587H/0r9pWvbOU1UuX84+1/R8l5mPv1MdXa9pWpDaMivm0XLbO7wLNUtE36FO4uxkQyZLslJLw1ehFlrl8Wj+aRY3E7hsOoh27Y/kor1Le4OQ2K3IpcihzLU7SCuczGtuKLLzGlMNekzmWwCWsW7AAeLPPyqgAMKAAAAABDJIYHpwAB5lgMuQe3m+b7l4ebOmGdx+xpXB7Of37+FfR835lp2PXV9pS3qD1Y5zL0i9C0v139xhFcJ6c/Ds+Zva+TZTv7k+z9IVpresJ60HTZdYOsNf1w64aOme7SiV5gu0hzB0yZ5BZnaWnIgbPL4S5EAGMs5PbPsAB5c87koADmoAAAAAAAAQySGB6cAAebXLm+3uX62Ut6832hgAAAAYB1nLlE0AA6Tmn1hoABf1sU0AAl5p9IugAHO8uVNAAOSgAAAAAAAAAAAAAQySGB6cAAeZYDAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAhkkMD04AA8ywGyNQJBGo1AkEajUCQRqNQJBGo1AkEajUCQRqNQJBGo1AkEajUCQRqNQJBGo1AkEajUCQRqNQJIY1DYHpwNQBZYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEMkAegAAH/9k="&gt;&lt;img style="float: right; margin: 0pt 0pt 10px 10px; cursor: pointer; width: 196px; height: 196px;" src="data:image/jpg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wCEAAkGBhAQDw8PDQ8ODw0PDg8PDA0PDA4PDQ0NFRAVFBQQEhIXHiYeFxkjGRISHy8gIzMtLi0sFR4xNTMtOTIuLS0BCQoKDgwOFQ8PFykfHBgpKSkpKSkpKSwpLCkpKikpKSwsLSkpKSksKSksLCkpKSopKSkpKSksKSkpKSkpNikpKf/AABEIAOEA4QMBIgACEQEDEQH/xAAbAAEAAQUBAAAAAAAAAAAAAAAAAQIDBAUHBv/EAEkQAAICAQICBwQDCgoLAAAAAAABAgMEERIFIQYTMUFRYZEHIjJxUoHSFEJTgpKhscHR0xYjJDNUVWJylKQIGCZEg5Oio7PC8f/EABkBAQEBAQEBAAAAAAAAAAAAAAABAgMEBf/EACQRAQEAAgICAQMFAAAAAAAAAAABAhEDIRIxUUFxgRMiMmHw/9oADAMBAAIRAxEAPwDiTk/F+rG5+L9WQwBO5+L9WNz8X6sgATufi/Vjc/F+pAAnc/F+rG5+L9WQAJ3Pxfqxufi/VkACdz8X6sbn4v1ZAAnc/F+rG5+L9WQAJ3Pxfqxufi/UgATufi/Vjc/F+rIAE7n4v1Y3PxfqyABO5+L9WNz8X6sgATufi/Vjc/F+rIAE7n4v1Y3PxfqyABO5+L9WQ5PxfqwQwPUbn4v1BAA8ywGAAAN44XIAAd5wye2dgCRO06zCfB2gFWwbC+JqqQTtIaJcJ8HYADleGX0bAAcMuO4rsABhQAAAAAAAAAACGSQwPTgADzLAYPRx8X1qbBoSol6unU9MhJtaUC7GkyYUF5Vja9RixxytY5k7SdB2bY/UIdQjI2jaDbFeOiiWP4GZtKXEG2BKgtOBsnAszqKvVYOgL86iy4k0zZpAAPNnxfWGwAHnUAAAAAAAAIZJDA9OAAPNMmMQlzMimo+jEkTVSZUKyYQL8Kx7W1RGsuKkyascyoYnkakRruoHUm2WGUTxS6Rq+rHVmdKgt9SXQxHWUuBmSqLbrJoYjgUSiZcqy1KBNKxZQMeyozZRLcomWpWulEpMq6oxpIVmxAAPLy4a7hAAHBQAAAAAIZJDA9OAANBXDmZtcNCmmlP4X9T5Mv7Gu1H0Sqq4mdjVGNRE22HUakRlYuFqbSjhjfcX+FYmrR7bhXAtyXIZZSI8PPhbXcYt2F5HReJcFUFzR5LiekNdPUTKUeZuxtO3kYNlkI9r9OZk59rbZp7kbGQ8qtv4tPmmVbU/hafyeprJIp1aeqbT8V2hdNhZAsSiU15vdP8AK7/rL7jy17n2eZKjFlEtSiZMkWpIxVY84GDbA2comLkQJFjABVNFJnKb6Z9AAPDZrpoABAAAAhkkMD04AA1tcTIhY15rwLUEXEj6MRnY+19vJ/mNvh0965ryNHSzbYM3qtNdW9El2t+COkR63hD00Pf8Fz1GPmeEw+F5cYqc8TKUNNd7xrVy8ddDZw4koQ5Pn2fWc85Mh6DjnEYtNJ/N+Jz7i9+rZm53FdV2misc7ZbK4Tsm+yEISnN/KK5kxx0NXlM1lxuuKcHyqY778XJqh9OzGthD8prQ0VkjrKMingWTZRPJrx7Z41e7rb4wbqr2pOW6XdomvU1zOtdEn/snxb+9l/8AhqORRZnHLds+GlXUya1UZNLtai2l9ZFGTtej+F/mfiey6N9PsjD4Zl4VWE76b/uh2ZG61KrrKIweu2LXJR15vvOf7iy227WTbcyRbaJ4bXZZW3Guyar5SlGucoxWmvNpaLl+gz8DgeTkJvFxsi9L4pU0W2RT8G4prUl6Zaxox7Ymdl4s6puu6udVkfirshKE4/OMuaMSxGFa21Fo2WNwm/Im4YtF1812wppnbJLzUU9CriPRfOx47snCy6IfTtxbq4flNaC2Jk1YAPHyz9xAG0wOiufkQ6zGwcy6vusqxbpwfyklozAy8OymbrvrsqsXxV21yrmvnGSTRzVaAAAhkkMD04AAxcRw3LrFKUNXuUJKM3y5aNppc9O42HWYn4HK/wATT+7MLh1jVkXG3qGm9LdZrZyffBOXPs5eJu1mW/1p/wBzO+wfRRi12Yv4LK/xNX7s6jwf7l4Nwuvijpc8zM2rFjdOMnXGabilJJaLZFzenN6panPI5tn9Z6/8TO+wdi4/0rsw+D8Py8eOPkQlDGhZO6Ns4pSo5SWnNNyjpz8THLfU+R4jD9rPEZWqXWxlHXV1Sro6trw1jFSXqb72iRqnjYfFKouCydsb4RaWsp1ucZPlzktkot9/IwMT2vZE3p9zcP8AlGq/X87NZ0w6eZGbTHGshj0wjZC1SrV6lqlJad6095mfG+UsmhqKr4XTrqhC2Vls4VwXWwWs5SUV974tHQOk/SOno9TTiYNUZ5d0HZbdP4tqe1WT0+JtqWi7Eov6/A9E85wz8FzzFsWXRug5ZGkk7EtPh07+82/tthZHidcnd1ddmJV1ert092dil8Ka7WvVGs+8pjfQjh/tyza5fyiqvJqfxQajVPT+zKK09UzxXFuJY12VkXQpuhXbfbZXBWVQ2QlNyUdqi0tE9NEI2y05Z8V5a5f2ArZ93EF9Usv7B0xwmN3B0jotKv8AgrxXZGxQ1y9VKcXNvqauxqKX5jk1U6O+F/1XVr/0OvdGrJPotxRvI6x65Wl2tvu/xVXL3lu/+nKMfIn3Z238bJ/VEzx+8vurqXQGVf8ABrjeyNihpm7lKyLk/wCRQ10aikuXkcbjPH767/qvr+wdn6C2yfRzjTeR1jUczS3W17P5FD6ST8+RyCrJs7s/b+NlfqiXj/ll91jr3sVvojwvi0pV2SojKcr6pTjKVlaxtZRWij2xTRpcj245ScYYWNjYuNDRVUKvfpBdib5JfipG29lVknwbjTlf1z2XaT1te3+SP6ST9DltEn+H0+u39hymMueW2a6/xnIp4/wK3OlSq83BVspbHzTrip2QjJrXZOtppPsenhqcZhKqTS22LVpa9ZDRavt+E7B7Om6uA8Yuts3VyWTtm3PT3cRRemq17WlyOO1Wy00+6dOWj53fsLx9WxXavaHx6zo9h4eLwqhV02KxWZWyM5dZHb8TacXZLVtuWvZy8vGcH9v+fVJfdUIZdLfvxlGuqzb37ZVxS9UzM4B7b7aKY43EK6M+qMVBWbrYXSguSVilBxsfny179e09Jwi3o/x9zx6KJ4eaqnNOqP3PNJNJzi4PZPRyXKS7+w5WeM/dPyjhOZkUzssmq7Up2TmkrILRSk2l8PmdB9j/AERxMmeTxDNrk8Lh8OslC1xnXZYoym3KOi3RhGOune3Hu1R43jmDbi5WRi2Zfv491lUnrd722TW7s79NfrOw+xLOkuD8T6mUMnKqutshXLfKM5PFj1cJJ6NpyrkvUnJ/vRHiOOe3filt0nh2V4mMnpTTGiiyWxdm+U09Xp4aI9Zi8QXSbgeY8uqpcU4dGU6r647XJ7HOOn0VPZOLj2apPw087/rAZf8AV3DP+Tb9oiz/AEgcxwsrjhcPhGyEoy2Qui9Gmu6fmcFcuTKigrAEMkhgenAAGFiXKMlJxjNLX3J7tr5d+1p/nNiuJQ/ouN/mf3hpqZmVBn0StiuIw/ouN/mf3h7vob7R8eGPLh3FceMuHzTjB1xnNVRlLc4yi25OOvvJx5p9ndpzZGXj4spc9NF3yb0XqxcZlNVHVsfg3RtSVlXEVGPaq3Z7y17veju9eZrOnHE+Fqmujh1XWTVqnbfOFmk4qEo7dzak+ctdNNDw8KoLv3Py5R9XzNxViRup3R+KPavNftRP07O90aPIzormsejVc09b00/FaTOiUdPOE8Xxqsfj8XRlU69XlRU1W20k5xnHXZqktYy93kvLTnedjmnnHnoaywmX4I61DA6KYq6y3LWY1zjTB2WbvJwh2/jNLxOd8X43TdlZFtOLTGmy+2dUJRsjKNcptxTjGe1PRrkuXgaV1kpaDHDXe9q6ZwHpnhVdH+IYds66sq55HU4yryHGzdXWo+9zS1cWu1dhzinNj+Apfz677ZZnzLTehrHHVv8Aaun9EemuDRwPiuLfOqnJyFldRjqvIkrd2LGEdZc0tZJrtXYcxhnRX+70P59f9sx5vUttiYzG2/JendPYnk0vhXFpXVRjQpTd8Kt+s6lje+lrLXXbqu1GLjcA6Kz0tjxKyFb5uiyxwmv7PvQ3fp+ZrvZnx/FxOEcUoyr4VX5MLeorkp7p7sZwSWi8Xoc6hE83jbll3pl0Tp/7RMWWLDhXB6tvD4adbZKNkFdpLcoQTalpu95ylzk/r157iZcYThOVVbUZwlJe/q0pJtfF5FEqyxc+R0xx11FdZysPolxDW6vIfDrpPdOuUZ1QUnzf8XJOC/EehXw3jPR3gPWZGFfPiOfKuVdcYa7FF6PTdptgm1HV83p2I4rMpOeXHfW+kZ3FOMyyL7si6uuVt9s7bH/GJb5ycnot3Jcz0Hs+9ok+E5TtjTGWPalDKpi5KU4J6xlFttKUdXp82u/VeQaDR58r3pXZ+JcM6J8Tm8qOdPh91j33VfzUd75tuE4uKf8AcehhZ38FuHY2RHFst4jnWY91VFkou1U2TrlGM1yjWtG09VrJacjkmhGhgEVlKRUAIZJDA9OAANJjtt6Jam0qx9Pjaj5ffehq1nNcoJQXl2+pcqvb7XzPonttlkQj8EdX9KXP83Z+kpllyk+bf7PkYsZFWpZUZcLTZcJ4hKuxbU5KXuziu1ru08zTV6tpLtZnu1VLauc2tJPwX0V+tm9o3nEsRSW+vRxfhzSZ5fNxmnqjMw+Jzrb0esX8UH8L/YzOs6q1apqEn95Llz8mB5fcUuw2HEOEyXNLT9DNRbGUeUk0WNxXK0tSkUORMK5S+FN/o9SruRTKRl4GHq1OXZ96vF+PyLuPw5LnPm/o931+JmNmLWLdomi3poVSkWpzMCmyRg32F6+0w5yEX0pZABzzy8ZtkAB4mgAAAAAIZJDA9OAAPNMrhMtsH0JWZdM+q4yYPXku01cJGzrtVS587H/0r9pWvbOU1UuX84+1/R8l5mPv1MdXa9pWpDaMivm0XLbO7wLNUtE36FO4uxkQyZLslJLw1ehFlrl8Wj+aRY3E7hsOoh27Y/kor1Le4OQ2K3IpcihzLU7SCuczGtuKLLzGlMNekzmWwCWsW7AAeLPPyqgAMKAAAAABDJIYHpwAB5lgMuQe3m+b7l4ebOmGdx+xpXB7Of37+FfR835lp2PXV9pS3qD1Y5zL0i9C0v139xhFcJ6c/Ds+Zva+TZTv7k+z9IVpresJ60HTZdYOsNf1w64aOme7SiV5gu0hzB0yZ5BZnaWnIgbPL4S5EAGMs5PbPsAB5c87koADmoAAAAAAAAQySGB6cAAebXLm+3uX62Ut6832hgAAAAYB1nLlE0AA6Tmn1hoABf1sU0AAl5p9IugAHO8uVNAAOSgAAAAAAAAAAAAAQySGB6cAAeZYDAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAhkkMD04AA8ywGyNQJBGo1AkEajUCQRqNQJBGo1AkEajUCQRqNQJBGo1AkEajUCQRqNQJBGo1AkEajUCQRqNQJIY1DYHpwNQBZYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEMkAegAAH/9k=" alt="" border="0" /&gt;&lt;/a&gt;&lt;/p&gt; &lt;p&gt;Imageplus,  is currently using a program called &lt;a href="http://www.scons.org/"&gt;scons&lt;/a&gt; to compile.&lt;/p&gt; &lt;p&gt;We changed this and landed our workspace to the IDE that they are using, &lt;a href="http://www.eclipse.org/"&gt;Eclipse.&lt;/a&gt;&lt;/p&gt; &lt;p&gt;Furthermore, we had to add two libraries from Wt in order to join them, the name of the libraries is&lt;span style="font-style: italic;"&gt; libwt&lt;/span&gt; and &lt;span style="font-style: italic;"&gt;libwthttp&lt;/span&gt;.&lt;/p&gt; &lt;p&gt;Once environment was ready, I started to work, and I did some examples that I'm going to explain right now:  &lt;/p&gt;  &lt;p style="margin-bottom: 0cm;"&gt;The first one was an easy one, we just wanted to test if it was possible to request some data, process it with imageplus and in the end, show the result in the client's browser.&lt;/p&gt;  &lt;p style="margin-bottom: 0cm;"&gt;And the algorithm I used was the &lt;a href="http://hyperphysics.phy-astr.gsu.edu/hbase/vsca.html"&gt;scalar product&lt;/a&gt; between two vectors, which is a data input and data output algorithm, no images, no videos.&lt;/p&gt;  &lt;p style="margin-bottom: 0cm;"&gt;Well, you can see in this figure the result, it worked fine.&lt;/p&gt;&lt;p style="margin-bottom: 0cm;"&gt;&lt;br /&gt;&lt;/p&gt;&lt;a href="http://4.bp.blogspot.com/-9_BdmMnrXx4/TZcS6YPxqeI/AAAAAAAAACU/eBaEGCKv-bw/s1600/pev.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 429px; height: 79px;" src="http://4.bp.blogspot.com/-9_BdmMnrXx4/TZcS6YPxqeI/AAAAAAAAACU/eBaEGCKv-bw/s320/pev.png" alt="" id="BLOGGER_PHOTO_ID_5590958256554093026" border="0" /&gt;&lt;/a&gt; &lt;p style="margin-bottom: 0cm;"&gt;The second one was a bit more complicated, not so much, but a little bit. It was “plain image generator”. I mean, imageplus was generating images as long as the client requested it. The images were in different colors depending on the position the cursor was poining. Thus, if the cursor was in the top left position, the square generated was different from the bottom right position.&lt;/p&gt;  &lt;p style="margin-bottom: 0cm;"&gt;I think that this example  makes imageplus overwork, because every time the cursor moves, a request is sent to the server, and then imageplus generates an image. If you want to pass from half a square to the other half, how many requests do you need to do? How many images are you generating?&lt;/p&gt;  &lt;p style="margin-bottom: 0cm;"&gt;Well, really this is a nonsense without any relevance because I was just testing how imageplus could generate and image and display it through the web.&lt;/p&gt;  &lt;p style="margin-bottom: 0cm;"&gt;Here there are a couple of screenshots.&lt;/p&gt;&lt;br /&gt;&lt;p style="margin-bottom: 0cm;"&gt;&lt;a href="http://4.bp.blogspot.com/-Y_vzJWqPAo8/TZcSzWP1DKI/AAAAAAAAACM/INhsmAxWXQM/s1600/IPImage_blue.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 279px; height: 270px;" src="http://4.bp.blogspot.com/-Y_vzJWqPAo8/TZcSzWP1DKI/AAAAAAAAACM/INhsmAxWXQM/s320/IPImage_blue.png" alt="" id="BLOGGER_PHOTO_ID_5590958135758359714" border="0" /&gt;&lt;/a&gt;&lt;/p&gt; &lt;p style="margin-bottom: 0cm;"&gt;&lt;a href="http://3.bp.blogspot.com/-p9Wmk62O4NM/TZcSwQKYyVI/AAAAAAAAACE/LDBodgyZNdk/s1600/IPImage.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 269px; height: 270px;" src="http://3.bp.blogspot.com/-p9Wmk62O4NM/TZcSwQKYyVI/AAAAAAAAACE/LDBodgyZNdk/s320/IPImage.png" alt="" id="BLOGGER_PHOTO_ID_5590958082585315666" border="0" /&gt;&lt;/a&gt;&lt;/p&gt;  &lt;p style="margin-bottom: 0cm;"&gt;And now the last one. This example consists on an image in which you go with the cursor over it, and you get the red, green and blue average as well as the total average of the neighbour pixels of the pixel you are pointing.  &lt;/p&gt;&lt;br /&gt;&lt;p style="margin-bottom: 0cm;"&gt;&lt;a href="http://3.bp.blogspot.com/-UA2SIZzRIpo/TZcTYoXX9kI/AAAAAAAAACc/HhN_Z76deTY/s1600/pixelaverage.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 300px; height: 320px;" src="http://3.bp.blogspot.com/-UA2SIZzRIpo/TZcTYoXX9kI/AAAAAAAAACc/HhN_Z76deTY/s320/pixelaverage.png" alt="" id="BLOGGER_PHOTO_ID_5590958776277005890" border="0" /&gt;&lt;/a&gt;&lt;/p&gt; &lt;p style="margin-bottom: 0cm;"&gt;&lt;br /&gt;&lt;/p&gt;  &lt;p style="margin-bottom: 0cm;"&gt;Although I'm going to explain this now, It applies also for the examples above mentioned.&lt;/p&gt; &lt;p style="margin-bottom: 0cm;"&gt;Imageplus uses a type of structure, or better said, a class to store the image, but Wt uses another type of class to store its image. So I needed to connect them somehow.&lt;/p&gt;  &lt;p style="margin-bottom: 0cm;"&gt;The Wt's class which can store a image, getting it in a low level,  pixel by pixel is called &lt;a href="http://www.webtoolkit.eu/wt/doc/reference/html/classWt_1_1WRasterImage.html"&gt;Wt::WRasterImage&lt;/a&gt;, then I have done a function which given a image from imageplus, it returns a pointer to WRasterImage filled with this image. This function will be the main connector to transmit images to the web, very useful.&lt;/p&gt;  &lt;p style="margin-bottom: 0cm;"&gt;Although Wt can process the pixels by acceding them and changing their value, it is not the goal we are aiming to. We want all the image and video processing to be done with imageplus.&lt;/p&gt;   &lt;p style="margin-bottom: 0cm;"&gt;Currently, I'm trying to merge the &lt;a href="http://www.webtoolkit.eu/wt/doc/reference/html/classWt_1_1WRasterImage.html"&gt;webpage I did&lt;/a&gt; last week with two of the demos, which seems a bit more complex, but not impossible. I'll explain you in the following articles!    &lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-8652883506852331486?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/8652883506852331486/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/04/integrating-wt-imageplus.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/8652883506852331486'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/8652883506852331486'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/04/integrating-wt-imageplus.html' title='Integrating Wt + Imageplus'/><author><name>Marcel</name><uri>http://www.blogger.com/profile/06357583442533980245</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/-9_BdmMnrXx4/TZcS6YPxqeI/AAAAAAAAACU/eBaEGCKv-bw/s72-c/pev.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-1192365909445195680</id><published>2011-04-02T12:54:00.006+02:00</published><updated>2011-04-02T13:43:03.794+02:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='object'/><category scheme='http://www.blogger.com/atom/ns#' term='detection'/><category scheme='http://www.blogger.com/atom/ns#' term='xavi'/><category scheme='http://www.blogger.com/atom/ns#' term='Laura'/><title type='text'>Multiscale object detection tested in Formula 1 sequences</title><content type='html'>The detection of objects at a local scale might be a hard processing task if every particular location in the image must be tested. In our research, we focus on the object retrieval and detection on &lt;a href="http://bitsearch.blogspot.com/2010/04/interactive-segmentation-studying.html"&gt;Binary Partition Trees&lt;/a&gt;, a hierarchical segmentation of the image that increases (in fact, duplicates) the basic problem of local analysis.&lt;br /&gt;&lt;br /&gt;In general, though, the context provided by the pixels of the image that do not represent the object provide valuable information that can simplify the local analysis. In our recent tests, we have considered a double-scale processing of images: at the global and the local scales.&lt;br /&gt;&lt;br /&gt;We have demonstrated this approach in the domain of Formula 1 sequences of image. Firstly, we &lt;a href="http://bitsearch.blogspot.com/2010/10/how-to-built-your-object-detector-in.html"&gt;have annotated&lt;/a&gt; with &lt;a href="http://gps-tsc.upc.es/imatge/i3media/gat/"&gt;GAT&lt;/a&gt; a reduced set of around ten diverse examples of every type of object that wants to be detected. Furthermore, we have also annotated as negative examples another set of images that do not contain the object.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://1.bp.blogspot.com/-J1D3tBtIaLA/TZcLKpeFk8I/AAAAAAAAAgk/PS3Vnt_EVZE/s1600/PositiveInstances.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 400px; height: 215px;" src="http://1.bp.blogspot.com/-J1D3tBtIaLA/TZcLKpeFk8I/AAAAAAAAAgk/PS3Vnt_EVZE/s400/PositiveInstances.png" alt="" id="BLOGGER_PHOTO_ID_5590949739962405826" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;The object detector algorithm has used this annotation to train two types of classifiers. Firstly, a classifier based on global scale features (color distribution and edge histogram) capable of discerning between those images that may contain an instance of the modeled object from those that do not.&lt;br /&gt;&lt;a href="http://4.bp.blogspot.com/--bJMPMpZMiQ/TZcJfQWVEQI/AAAAAAAAAgc/V23U-qbAhJU/s1600/local.jpg"&gt;&lt;br /&gt;&lt;/a&gt;&lt;a href="http://1.bp.blogspot.com/-e6IGjEa_xKA/TZcJfVTeU1I/AAAAAAAAAgU/VPewC8io0Ec/s1600/global.jpg"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 400px; height: 225px;" src="http://1.bp.blogspot.com/-e6IGjEa_xKA/TZcJfVTeU1I/AAAAAAAAAgU/VPewC8io0Ec/s400/global.jpg" alt="" id="BLOGGER_PHOTO_ID_5590947896303178578" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;In a second stage, those image marked as candidates, have been analyzed at a local scale. The detection of the objects has been performed on the local features computed for every region defined by the Binary Partition Tree. In this case the detector extracted the object and showed it with an overlaid mask in green.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://4.bp.blogspot.com/--bJMPMpZMiQ/TZcJfQWVEQI/AAAAAAAAAgc/V23U-qbAhJU/s1600/local.jpg"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 400px; height: 230px;" src="http://4.bp.blogspot.com/--bJMPMpZMiQ/TZcJfQWVEQI/AAAAAAAAAgc/V23U-qbAhJU/s400/local.jpg" alt="" id="BLOGGER_PHOTO_ID_5590947894972977410" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;So far, no &lt;a href="http://bitsearch.blogspot.com/2011/01/temporal-stabilizer-in-bounding-boxes.html"&gt;temporal stabilization&lt;/a&gt; has been applied yet, so every frame has been treated completely independent. This experiment has allowed a faster processing of the complete sequence, as well a reduction on the amount of false positives when compared with the case of only working at the global scale.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-1192365909445195680?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/1192365909445195680/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/04/multiscale-object-detection-tested-in.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/1192365909445195680'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/1192365909445195680'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/04/multiscale-object-detection-tested-in.html' title='Multiscale object detection tested in Formula 1 sequences'/><author><name>Xavi Giró-i-Nieto</name><uri>https://profiles.google.com/110271653716466055491</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh5.googleusercontent.com/--EaBZ1Bzzsg/AAAAAAAAAAI/AAAAAAAAAwU/4J6xIhZ3Xyo/s512-c/photo.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://1.bp.blogspot.com/-J1D3tBtIaLA/TZcLKpeFk8I/AAAAAAAAAgk/PS3Vnt_EVZE/s72-c/PositiveInstances.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-1992891063227701966</id><published>2011-03-28T22:34:00.011+02:00</published><updated>2011-04-06T12:47:37.424+02:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Eli'/><category scheme='http://www.blogger.com/atom/ns#' term='user interface'/><title type='text'>Web interface for shot type detection</title><content type='html'>&lt;div style="text-align: justify;"&gt;In this post I will explain the evolution of the web interface I have to develop for my bachelor thesis, which persues semi-supervised key-frame shot type identification. This interface will be used to re-train a shot type classifier (already trained with manual labeled examples) for a given domain by validating or correcting its automatic detections and then adding them to the trainer to generate a new model. So for a given domain and an asset (group of keyframes of that domain) the interface will have to show the automatic shot type identification and provide the necessary tools for validating the automatic detections or correcting them if necessary.&lt;/div&gt;&lt;div style="text-align: left;"&gt;&lt;div style="text-align: left;"&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;After one month of work, two diferent layout proposals were presented to the Documentation Department at the CCMA in order to keep developing the interface with the structure that better suit their needs, these are the following ones:&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;b&gt;&lt;span class="Apple-style-span"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;b&gt;&lt;span class="Apple-style-span"&gt;1st Proposal:&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/-aGvRBHIh-2o/TZD6pdy-JeI/AAAAAAAAACI/9P2xoemqxxA/s1600/interf1.png"&gt;&lt;img src="http://2.bp.blogspot.com/-aGvRBHIh-2o/TZD6pdy-JeI/AAAAAAAAACI/9P2xoemqxxA/s400/interf1.png" alt="" id="BLOGGER_PHOTO_ID_5589242727846782434" style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 400px; height: 290px;" border="0" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;• For each domain we have different tabs for each shot type.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;• Each tab contains all the detected keyframes for that shot type.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;• The number of keyframes on each tab is fixed, the variable thing is the number of pages for each tab, which depend on the quantity of keyframes detected for that shot-type.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;• Keyframes have to be labeled as positive (detection is ok) or negative (detection is wrong)&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;• Besides labeling as negative a keyframe the information of the right shot-type has also to be provided.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;• Only one shot-type can be seen at once.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;• To open another asset the already opened one has to be closed, cannot manage working with different assets at the same time.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;b&gt;&lt;span class="Apple-style-span"&gt;2nd Proposal:&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/-2IZf13TLWis/TZD6p4tAI2I/AAAAAAAAACQ/bh29JKitN_0/s1600/interf2.bmp"&gt;&lt;img src="http://2.bp.blogspot.com/-2IZf13TLWis/TZD6p4tAI2I/AAAAAAAAACQ/bh29JKitN_0/s400/interf2.bmp" alt="" id="BLOGGER_PHOTO_ID_5589242735069504354" style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 400px; height: 265px;" border="0" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;• For each domain we have different tabs for each asset, so simultaneus working with several assets is supported.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;• For each asset tab there are as many keyframe rows as shot types in that domain. So all shot types can be seen at once.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;• The columns of each row are fixed for all shot types. As not all shot types have the same keyframes detected and pagination is shared by all shot-types, some of the rows can run out of keyframes.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;• This layout makes it suitable for drag and drop functionalities to easily correct a keyframe shot-type detection, so there is no need of labeling any keyframe as negative, just drop it to the right place.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;• Once all keyframes are in the right place the page can be entirely validated.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;• As when validating there are examples of all shot types within a domain, at training the classifier would have similar number of examples for each one so it will improve in a more homogenic manner (a class will not be undertrained as it can happen with the firs proposal).&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;b&gt;&lt;span class="Apple-style-span"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;div style="text-align: left;"&gt;&lt;div style="text-align: justify;"&gt;&lt;b&gt;&lt;span class="Apple-style-span"&gt;Current interface:&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;Because of its functionality and its visual component, the 2nd proposal was chosen by the CCMA with additional requeriments as showing the title and the detection score for each keyframe. This is its actual appearence before and after validation:&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://3.bp.blogspot.com/-aWsL0Z7mOEA/TZD6qbP_3sI/AAAAAAAAACY/Remy_hwsxDA/s1600/interficieNoAnotat.bmp"&gt;&lt;img src="http://3.bp.blogspot.com/-aWsL0Z7mOEA/TZD6qbP_3sI/AAAAAAAAACY/Remy_hwsxDA/s400/interficieNoAnotat.bmp" alt="" id="BLOGGER_PHOTO_ID_5589242744343092930" style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 400px; height: 292px;" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://1.bp.blogspot.com/-a5w6B0ZSAD4/TZD6qm4IhUI/AAAAAAAAACg/hU01eCJEE9s/s1600/interficieAnotat.bmp"&gt;&lt;/a&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://1.bp.blogspot.com/-a5w6B0ZSAD4/TZD6qm4IhUI/AAAAAAAAACg/hU01eCJEE9s/s1600/interficieAnotat.bmp"&gt;&lt;img src="http://1.bp.blogspot.com/-a5w6B0ZSAD4/TZD6qm4IhUI/AAAAAAAAACg/hU01eCJEE9s/s400/interficieAnotat.bmp" alt="" id="BLOGGER_PHOTO_ID_5589242747464222018" style="text-align: justify; display: block; margin: 0px auto 10px; cursor: pointer; width: 400px; height: 292px;" border="0" /&gt;&lt;/a&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/-2IZf13TLWis/TZD6p4tAI2I/AAAAAAAAACQ/bh29JKitN_0/s1600/interf2.bmp"&gt;&lt;/a&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/-2IZf13TLWis/TZD6p4tAI2I/AAAAAAAAACQ/bh29JKitN_0/s1600/interf2.bmp"&gt;&lt;/a&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;/div&gt;&lt;/div&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-1992891063227701966?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/1992891063227701966/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/03/in-this-post-i-will-explain-evolution.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/1992891063227701966'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/1992891063227701966'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/03/in-this-post-i-will-explain-evolution.html' title='Web interface for shot type detection'/><author><name>Eli Carcel</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://2.bp.blogspot.com/-aGvRBHIh-2o/TZD6pdy-JeI/AAAAAAAAACI/9P2xoemqxxA/s72-c/interf1.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-7251462284469084798</id><published>2011-03-24T17:39:00.006+01:00</published><updated>2011-03-25T15:16:49.185+01:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Marcel'/><category scheme='http://www.blogger.com/atom/ns#' term='html'/><title type='text'>My first Wt C++ web page</title><content type='html'>The main goal of my project, is to show algorithms in a web page. If I want to do so, I obviously need to create a web page. But, what is the best way? There are a lot of technology behind web pages and web applications,&lt;a href="http://www.php.net/"&gt; PHP&lt;/a&gt;, &lt;a href="http://www.asp.net/"&gt;ASP.NET&lt;/a&gt;, &lt;a href="http://www.python.org/"&gt;Python&lt;/a&gt;, &lt;a href="http://ruby-lang.org/"&gt;ruby&lt;/a&gt;, etc.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://bitsearch.blogspot.com/2011/03/wt-web-programming-with-c.html"&gt;In my last post&lt;/a&gt; I spoke about &lt;a href="http://www.webtoolkit.eu/wt"&gt;Wt&lt;/a&gt;, a C++ library for web purposes. In addition, ImagePlus, which is also programmed in C++, is the library used by the&lt;a href="http://gps-tsc.upc.es/imatge/"&gt; Image and Video Processing Group&lt;/a&gt; from the Polytechnic University of Catalonia.&lt;br /&gt;&lt;br /&gt;What I'm doing is trying to make them easier to show their algorithms in a web page.&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://bluescripts.net/images/php.png"&gt;&lt;img style="float: right; margin: 0pt 0pt 10px 10px; cursor: pointer; width: 158px; height: 158px;" src="http://bluescripts.net/images/php.png" alt="" border="0" /&gt;&lt;/a&gt; Doing it in other language different from C++ would implicate someone would have to learn web based languages.&lt;br /&gt;&lt;br /&gt;Having said this, what I'm going to explain is that there is an important abstraction process between Wt and web technology. Wt programmers don't care about neither PHP nor any other web issues. So you can program whatever you want just using C++, which is very good for all C/C++ experienced  programmers because the need of learning a new language for the web disappears.&lt;br /&gt;&lt;br /&gt;And, what about HTML?&lt;br /&gt;I've been programming some time with PHP, and like it, what you are doing is writing the HTML code to be displayed in the client. But in different ways. In Wt you can create a new element such as a &lt;a href="http://www.webtoolkit.eu/wt/doc/reference/html/classWt_1_1WContainerWidget.html"&gt;WContainerWidget&lt;/a&gt;, and it's supposed that you have written a "div" element into a HTML web page. But the difference is that WContainerWidget is a class and you can use its methods in a very easy way.&lt;br /&gt;Conversely, in PHP you have to print all the page code as an string, so it's not so fine.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.amaronline.com/wp-content/uploads/css_logo.png"&gt;&lt;img style="float: left; margin: 0pt 10px 10px 0pt; cursor: pointer; width: 200px; height: 180px;" src="http://www.amaronline.com/wp-content/uploads/css_logo.png" alt="" border="0" /&gt;&lt;/a&gt;And &lt;a href="http://www.w3schools.com/css/"&gt;CSS&lt;/a&gt;, How does Wt manage to put the style on?&lt;br /&gt;First of all, CSS means Cascading Style Sheets and its purpose is to split content from style. Wt allows to programmers to apply the style in two different possibilities. You can apply it in some methods directly from C++, or you can add an external CSS style-sheet with all the rules.&lt;br /&gt;Certainly, CSS is not a programming language, is most a descriptive language about how is the style of the page. Consequently, its syntax is very easy and it's not a problem.&lt;br /&gt;&lt;br /&gt;Well, and right now, I'm going to comment a bit my first web page example. I've done five classes which are:&lt;br /&gt;&lt;br /&gt;&lt;ul&gt;&lt;li&gt;GPIapp&lt;/li&gt;&lt;li&gt;GPIpage&lt;/li&gt;&lt;li&gt;GPIheader&lt;/li&gt;&lt;li&gt;GPIcontent&lt;/li&gt;&lt;li&gt;GPIfooter&lt;/li&gt;&lt;/ul&gt;&lt;br /&gt;All these classes except GPIapp are derived from WContainerWidget. In other words, these are “div” elements. GPIapp is used to launch the web page and set all the external style-sheets and features.&lt;br /&gt;&lt;br /&gt;GPIpage is the main container. Inside it, there are the rest of the classes. The GPIpage class is composed then of four private attributes, which are: the page itself, the header, the content and the footer.&lt;br /&gt;In the header we have the logo, a title, and a space in the right.&lt;br /&gt;&lt;br /&gt;The content is the perfect holder where the demos are going to be placed. So probably, it will have to become a bit more complex. Another point of abstraction, I think that it will be good stuff if there were a class called GPIdemos, and the content could accept one of those, then the complex code perhaps will be in this last class. Well, this another step, but it has to be thought correctly, so, let me think and I'll explain you it in next posts.&lt;br /&gt;&lt;br /&gt;This is my first web page example:&lt;br /&gt;&lt;br /&gt;&lt;a href="http://3.bp.blogspot.com/-pfRznzdVM7I/TYt2Zjpf2GI/AAAAAAAAAB8/aDd_3zTZARk/s1600/wp.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 498px; height: 499px;" src="http://3.bp.blogspot.com/-pfRznzdVM7I/TYt2Zjpf2GI/AAAAAAAAAB8/aDd_3zTZARk/s400/wp.png" alt="" id="BLOGGER_PHOTO_ID_5587689944121333858" border="0" /&gt;&lt;/a&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-7251462284469084798?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/7251462284469084798/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/03/my-first-wt-c-web-page.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/7251462284469084798'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/7251462284469084798'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/03/my-first-wt-c-web-page.html' title='My first Wt C++ web page'/><author><name>Marcel</name><uri>http://www.blogger.com/profile/06357583442533980245</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://3.bp.blogspot.com/-pfRznzdVM7I/TYt2Zjpf2GI/AAAAAAAAAB8/aDd_3zTZARk/s72-c/wp.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-3084092455143035145</id><published>2011-03-17T22:19:00.007+01:00</published><updated>2011-03-22T18:34:16.400+01:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='streaming'/><category scheme='http://www.blogger.com/atom/ns#' term='Marcel'/><category scheme='http://www.blogger.com/atom/ns#' term='html'/><title type='text'>Wt - Web programming with C++</title><content type='html'>&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://www-lacan.upc.edu/styling/data/logoUPC2048.gif"&gt;&lt;img style="margin: 0pt 0pt 10px 10px; float: right; cursor: pointer; width: 143px; height: 143px;" src="http://www-lacan.upc.edu/styling/data/logoUPC2048.gif" alt="" border="0" /&gt;&lt;/a&gt;Some time ago I was thinking in how to do a demonstration of a video algorithm for the &lt;a href="http://gps-tsc.upc.es/imatge/"&gt;UPC image processing group.&lt;/a&gt; The first idea was  to display the video output in a web interface. I certainly thought a lot around this issue, as well as investigated technologies such &lt;a href="http://bitsearch.blogspot.com/2011/02/gstreamer-libraries-proposed-for.html"&gt;gstreamer&lt;/a&gt;, &lt;a href="http://bitsearch.blogspot.com/2011/02/displaying-image-processing-group.html"&gt;ffmpeg&lt;/a&gt;, etc.&lt;br /&gt;&lt;br /&gt;But really this is not the only way to demonstrate those algorithms. Being aware that not all algorithms output a video, that some of them output coordinates or data in general, it's easy to think that there is another way. What about data streaming.&lt;br /&gt;On the one hand, there is the advantage that we can forget for some time the encoding and multiplexing processes.&lt;br /&gt;On the other hand it's still streaming, so I ought to find out the way to do that, with a smaller amount of data in each sent.&lt;br /&gt;&lt;br /&gt;With this last idea, my project became a bit different, the data now are not frames, the data are whatever, coordinates, names, etc. For this reason, I had to research how to send these data.&lt;br /&gt;In a image processing algorithm, is not known when data are going to change, so we certainly need a requirement, data have to be pushed.&lt;br /&gt;I mean, imagine the server in one side, the client in the other side.&lt;br /&gt;Then the server &lt;span style="font-weight: bold;"&gt;just gives data&lt;/span&gt; to the client whenever it wants, and the client just collect this data and integrate it in the web page live  demonstration.&lt;br /&gt;&lt;br /&gt;Well, I think this is very easy to understand but not possible theoretically, and I'm going to explain why not. We are trying to send the data trough HTTP protocol. This is because HTTP protocol is allowed by firewalls and we can send data without being blocked.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://jerome.jouvie.free.fr/images/Java/Network/Lessons/HTTP-Request.png"&gt;&lt;img style="margin: 0pt 10px 10px 0pt; float: left; cursor: pointer; width: 251px; height: 174px;" src="http://jerome.jouvie.free.fr/images/Java/Network/Lessons/HTTP-Request.png" alt="" border="0" /&gt;&lt;/a&gt;Then we have to think how &lt;a href="http://www.blogger.com/en.wikipedia.org/wiki/Hypertext_Transfer_Protocol"&gt;HTTP&lt;/a&gt; works. The way how it works is sending a request to the server, typically a GET or POST request, and the server responds with the data.&lt;br /&gt;So, as you can see, negotiation is needed, we can't just push data. It will be incoherent.&lt;br /&gt;&lt;br /&gt;Nevertheless, there are some hacks, you can have a look at &lt;a href="http://www.blogger.com/en.wikipedia.org/wiki/Reverse_Ajax"&gt;reverse AJAX&lt;/a&gt;, which has a lot of names, such as &lt;a href="http://www.blogger.com/en.wikipedia.org/wiki/Comet_%28programming%29"&gt;comet programming&lt;/a&gt;, or a lot more.&lt;br /&gt;&lt;br /&gt;I'm not going to comment all of them, but I'm going to mention the most important one,&lt;a href="http://en.wikipedia.org/wiki/Push_technology"&gt; long polling&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;This hack consists in doing a request (the client), and &lt;span style="font-weight: bold;"&gt;when the server has new data&lt;/span&gt;, then respond this request. When data arrive, a new request is sent. Thus, we can have a more efficient way that steadily polling.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Actually. there is a library called &lt;a href="http://www.blogger.com/www.webtoolkit.eu/"&gt;Wt&lt;/a&gt; (uttered as witty), whose use is exclusively to web programming, with the only difference that it is a&lt;a href="http://www.blogger.com/en.wikipedia.org/wiki/C%2B%2B"&gt; C++&lt;/a&gt; library. This library is very similar to the famous &lt;a href="http://en.wikipedia.org/wiki/Qt_%28framework%29"&gt;Qt C++ library&lt;/a&gt; which is used to create desktop interfaces.&lt;br /&gt;The library has a very good documentation, very good forums, and in general the impression that it give as, me, Albert and Xavi was a great impression. We couldn't believe that this library was able to do all what says in the documentation. It was really strange, if so, why is not it very known?&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.webtoolkit.eu/css/wt/wt_banner.jpg"&gt;&lt;img style="margin: 0px auto 10px; display: block; text-align: center; cursor: pointer; width: 442px; height: 95px;" src="http://www.webtoolkit.eu/css/wt/wt_banner.jpg" alt="" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;Our final answer was that is not very known because it is in C++.&lt;br /&gt;Nowadays, web programming is made by some technologies like &lt;a href="http://www.blogger.com/www.php.net/"&gt;PHP&lt;/a&gt;, &lt;a href="http://www.blogger.com/en.wikipedia.org/wiki/JavaScript"&gt;Javascript&lt;/a&gt;, &lt;a href="http://www.blogger.com/en.wikipedia.org/wiki/Ajax_%28programming%29"&gt;AJAX&lt;/a&gt;, ASP.NET, and so on.&lt;br /&gt;But Wt offers the possibility to C++ programmers to make a web page without any knowledge about all of this technology. Abstracting thus, these technologies just by using C++.&lt;br /&gt;&lt;br /&gt;Well, once said this, I'm going to mention my current trial applications.&lt;br /&gt;The first one has been a simply text input that when you key up, a message such “Hello unknown, how are you?” pops written down in the screen. When the name is known, it shows this name instead unknown.&lt;br /&gt;The background of this simple application is that every time you key up, there is a request. In addition, I've managed to learn how the style works in this library.&lt;br /&gt;Here I leave a screen-shot of my little application:&lt;br /&gt;&lt;br /&gt;&lt;a href="http://3.bp.blogspot.com/-wHYWFy93Puk/TYKBORt94qI/AAAAAAAAABs/5VGNSqfPAy0/s1600/demo1WebCpp.jpg"&gt;&lt;img style="margin: 0px auto 10px; display: block; text-align: center; cursor: pointer; width: 406px; height: 201px;" src="http://3.bp.blogspot.com/-wHYWFy93Puk/TYKBORt94qI/AAAAAAAAABs/5VGNSqfPAy0/s320/demo1WebCpp.jpg" alt="" id="BLOGGER_PHOTO_ID_5585168570167190178" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;This has been a good starting point, to realize how Wt could render a web page using C++. But after doing it, I've gotten my hands dirty doing a more complex server push, based on the example server push. Finally I've included another point to my second application. This is the painting.&lt;br /&gt;&lt;br /&gt;The second experiment has been to demonstrate the random algorithm from stdlib of C, by sending two random data, in a random time interval, and then the data are rendered by painting a little circle in the coordinates (random1,random2).&lt;br /&gt;&lt;br /&gt;&lt;a href="http://1.bp.blogspot.com/-6L9G-qyCgy4/TYKBTj76JGI/AAAAAAAAAB0/UKi7B5wE-B8/s1600/demoServerPus.jpg"&gt;&lt;img style="margin: 0px auto 10px; display: block; text-align: center; cursor: pointer; width: 297px; height: 246px;" src="http://1.bp.blogspot.com/-6L9G-qyCgy4/TYKBTj76JGI/AAAAAAAAAB0/UKi7B5wE-B8/s320/demoServerPus.jpg" alt="" id="BLOGGER_PHOTO_ID_5585168660956849250" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;I think that it is a good approach because I've already achieved to push data through the net. Now, it's time to connect my little application with ImagePlus, and take over the C random by the ImagePlus random. This will be my first ImagePlus algorithm demonstration.&lt;br /&gt;I expect to show you this in my next post, so, have a look!!&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-3084092455143035145?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/3084092455143035145/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/03/wt-web-programming-with-c.html#comment-form' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/3084092455143035145'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/3084092455143035145'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/03/wt-web-programming-with-c.html' title='Wt - Web programming with C++'/><author><name>Marcel</name><uri>http://www.blogger.com/profile/06357583442533980245</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://3.bp.blogspot.com/-wHYWFy93Puk/TYKBORt94qI/AAAAAAAAABs/5VGNSqfPAy0/s72-c/demo1WebCpp.jpg' height='72' width='72'/><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-6783182514331400126</id><published>2011-03-10T16:23:00.007+01:00</published><updated>2011-03-10T19:31:03.527+01:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Aida'/><category scheme='http://www.blogger.com/atom/ns#' term='retrieval'/><category scheme='http://www.blogger.com/atom/ns#' term='Relevance Feedback'/><category scheme='http://www.blogger.com/atom/ns#' term='xavi'/><category scheme='http://www.blogger.com/atom/ns#' term='thesis'/><title type='text'>Visual Search with Relevance Feedback based on Weights Updating</title><content type='html'>&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/-7VMoO0Aqhjg/TXkYj0Tv5JI/AAAAAAAAAgI/Z9MpdvG4ln8/s1600/Screen%2Bshot%2B2011-03-10%2Bat%2B19.24.46.png"&gt;&lt;img style="float: right; margin: 0pt 0pt 10px 10px; cursor: pointer; width: 220px; height: 320px;" src="http://2.bp.blogspot.com/-7VMoO0Aqhjg/TXkYj0Tv5JI/AAAAAAAAAgI/Z9MpdvG4ln8/s320/Screen%2Bshot%2B2011-03-10%2Bat%2B19.24.46.png" alt="" id="BLOGGER_PHOTO_ID_5582520216718402706" border="0" /&gt;&lt;/a&gt;A new Electrical Engineer in the world ! Yesterday Aida successfully defended her Bachelor thesis yesterday at &lt;a href="http://www.etsetb.upc.es/en/"&gt;Telecom BCN&lt;/a&gt;. She presented her work &lt;a href="http://gps-tsc.upc.es/imatge/_Xgiro/teaching/thesis/2010-2011/AidaRubiano/memoria.pdf"&gt;"Visual Search with Relevance Feedback based on Weights Update"&lt;/a&gt; (in Spanish), the first research effort from our work in the field of user interaction for image retrieval. This publication represents a step further after the &lt;a href="http://bitsearch.blogspot.com/2010/03/brand-new-mpeg-7-search-engine-based-on.html"&gt;previous thesis by Carles Ventura&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;The thesis compares three different approaches to handle the relevance feedback of a user after a visual search. These three techniques use the information provided by the user when selecting which of the retrieved images is relevant for his/her query. The user interaction allows estimating which of the four available visual similarity criteria match better the user expectations.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;a href="http://3.bp.blogspot.com/-92uaEzTW0YU/TXkYeM5raUI/AAAAAAAAAf4/TCFHc9M3zXU/s1600/foto.jpg"&gt;&lt;img style="float: left; margin: 0pt 10px 10px 0pt; cursor: pointer; width: 320px; height: 239px;" src="http://3.bp.blogspot.com/-92uaEzTW0YU/TXkYeM5raUI/AAAAAAAAAf4/TCFHc9M3zXU/s320/foto.jpg" alt="" id="BLOGGER_PHOTO_ID_5582520120240728386" border="0" /&gt;&lt;/a&gt;The three basic approaches are based on previous work by &lt;a href="http://dx.doi.org/10.1109/76.718510"&gt;Rui&lt;/a&gt; (updates descriptor weights by increasing/decreasing their relative value), &lt;a href="http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.2.2057&amp;amp;rep=rep1&amp;amp;type=pdf"&gt;Taycher&lt;/a&gt; (updates according to the mean distance among relevant images) and &lt;a href="http://dx.doi.org/10.1109/EURCON.2005.1629878"&gt;Aksoy&lt;/a&gt; (updated according to the relation between the distance standard deviations of relevant and non-relevant images). Aida's work also studied some variation of these three techniques.&lt;br /&gt;&lt;br /&gt;Experimentation with the CCD database showed that Taycher approach behaved best when &lt;a href="http://3.bp.blogspot.com/-m8Lsr-yuKqc/TXkYjlHddXI/AAAAAAAAAgA/2ByIVu76Q_Q/s1600/Screen%2Bshot%2B2011-03-10%2Bat%2B19.24.31.png"&gt;&lt;img style="float: right; margin: 0pt 0pt 10px 10px; cursor: pointer; width: 320px; height: 194px;" src="http://3.bp.blogspot.com/-m8Lsr-yuKqc/TXkYjlHddXI/AAAAAAAAAgA/2ByIVu76Q_Q/s320/Screen%2Bshot%2B2011-03-10%2Bat%2B19.24.31.png" alt="" id="BLOGGER_PHOTO_ID_5582520212640331122" border="0" /&gt;&lt;/a&gt;measuring performance with for &lt;a href="http://bitsearch.blogspot.com/2010/03/retrieval-systems-evaluation.html"&gt;ANMRR and MAP&lt;/a&gt; and that, in general, those techniques that consider the results as a set more than individual items perform better. The results of the study also proved that most gain in interaction is after the first and second iteration, considering in every feedback iteration the labeling of 20 results as relevant and non relevant. In terms of visual descriptors, the MPEG-7 Color Structure was the best relevance estimator, follow by the Texture Edge Histogram, the Color Layout and the Dominant Color, all of them part of MPEG-7. Finally, the comparison between Aida's and Carles worked proved that the fusion of ranked list by distance average is better than the fusion on position.&lt;br /&gt;&lt;br /&gt;The future work includes integrating Taycher technique in GOS, our GUI for image retrieval, running further tests on a larger and more generic dataset and defining a more accurate weight update formulation in terms of a linear classifier.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-6783182514331400126?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/6783182514331400126/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/03/visual-search-with-relevance-feedback.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/6783182514331400126'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/6783182514331400126'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/03/visual-search-with-relevance-feedback.html' title='Visual Search with Relevance Feedback based on Weights Updating'/><author><name>Xavi Giró-i-Nieto</name><uri>https://profiles.google.com/110271653716466055491</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh5.googleusercontent.com/--EaBZ1Bzzsg/AAAAAAAAAAI/AAAAAAAAAwU/4J6xIhZ3Xyo/s512-c/photo.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://2.bp.blogspot.com/-7VMoO0Aqhjg/TXkYj0Tv5JI/AAAAAAAAAgI/Z9MpdvG4ln8/s72-c/Screen%2Bshot%2B2011-03-10%2Bat%2B19.24.46.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-4924589942997486202</id><published>2011-03-09T13:33:00.005+01:00</published><updated>2011-03-22T12:37:18.956+01:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='streaming'/><category scheme='http://www.blogger.com/atom/ns#' term='Marcel'/><category scheme='http://www.blogger.com/atom/ns#' term='html'/><title type='text'>Streaming to an HTML5 web page</title><content type='html'>&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://cineytvonline.com/wp-content/uploads/2011/01/141streaming2.jpg"&gt;&lt;img style="float: right; margin: 0pt 0pt 10px 10px; cursor: pointer; width: 349px; height: 209px;" src="http://cineytvonline.com/wp-content/uploads/2011/01/141streaming2.jpg" alt="" border="0" /&gt;&lt;/a&gt;In some articles before this, I've been talking about some separated issues, such as &lt;a href="http://www.blogger.com/gstreamer.freedesktop.org/"&gt;Gstreamer&lt;/a&gt;, &lt;a href="http://www.blogger.com/www.videolan.org/vlc/"&gt;VLC&lt;/a&gt;, the &lt;a href="http://www.blogger.com/en.wikipedia.org/wiki/Hypertext_Transfer_Protocol"&gt;HTTP protocol&lt;/a&gt;, streaming techniques and &lt;a href="http://www.blogger.com/en.wikipedia.org/wiki/HTML5"&gt;HTML5&lt;/a&gt;.&lt;br /&gt;&lt;p&gt;Now, I've achieved in streaming against a web page. And I'm going to explain how the whole test has been.&lt;br /&gt;&lt;br /&gt;First of all, I succeeded in sending an HTTP stream with VLC, which has been a very important tool for my last discovery because it has allowed me to investigate how it works.&lt;/p&gt;&lt;br /&gt;Another import&lt;a href="http://www.linuxaria.com/wp-content/uploads/2010/09/wireshark.png"&gt;&lt;img style="float: left; margin: 0pt 10px 10px 0pt; cursor: pointer; width: 200px; height: 200px;" src="http://www.linuxaria.com/wp-content/uploads/2010/09/wireshark.png" alt="" border="0" /&gt;&lt;/a&gt;ant tool to take account of is wireshark. &lt;a href="http://www.blogger.com/www.wireshark.org"&gt;Wireshark&lt;/a&gt; is a network software that allows you to look at and capture what is going through your network interfaces. So you can see all the packets and a lot of information about them, for instance, how the protocol stack is composed.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Besides, I have to reference you again to another article written by myself which explains the main architecture of Gstreamer, one of the most important frameworks for streaming purposes in this moment.&lt;p&gt;&lt;/p&gt;&lt;p&gt;Once said this, let's get our hands dirty, let's to streaming!&lt;br /&gt;Firstly, we have to think about who is going to do streaming, and how. Now we are assuming that we are in the server, and we have to build something like a Gstreamer pipeline.&lt;br /&gt;This pipeline has to cover some requirements.&lt;/p&gt;&lt;a href="http://1.bp.blogspot.com/_Ylt9MrZAUYw/SusTUSiSR1I/AAAAAAAAACg/nLv7hwBoa8w/s320/SMPTE-GStreamerQtPythonMacOSX-test.png"&gt;&lt;img style="float: right; margin: 0pt 0pt 10px 10px; cursor: pointer; width: 281px; height: 260px;" src="http://1.bp.blogspot.com/_Ylt9MrZAUYw/SusTUSiSR1I/AAAAAAAAACg/nLv7hwBoa8w/s320/SMPTE-GStreamerQtPythonMacOSX-test.png" alt="" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;It has to start with a well-known video file and format. It will be raw.&lt;br /&gt;We have to be aware about what encoding and multiplexing methods HTML5 supports.&lt;br /&gt;What does HTTP streaming means in terms of the HTML5 video tag.&lt;br /&gt;Well, step by step. The first part is what to send. It is an early test so, we can send a gstreamer video pattern already designed for test purposes. It's name is &lt;span style="font-weight: bold;"&gt;“videotestsrc”&lt;/span&gt;.  We have got the first piece of the puzzle.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Now, our current worries are the the encoding and multiplexing methods. Well, really, when I tested the VLC HTTP streaming a few time ago, I used &lt;a href="http://www.blogger.com/www.theora.org/"&gt;Theora&lt;/a&gt; as video encoding method,&lt;a href="http://www.blogger.com/www.vorbis.com"&gt;vorbis &lt;/a&gt;as audio encoding method, and &lt;a href="http://www.blogger.com/en.wikipedia.org/wiki/Ogg"&gt;ogg&lt;/a&gt; as well, as multiplexing multimedia container. It worked perfectly, therefore I'm going to use them to do this test. All them except vorbis because there is no audio in here.&lt;br /&gt;&lt;a href="http://blog.haciendomedia.com/wp-content/uploads/WebM-300x161.png"&gt;&lt;img style="float: right; margin: 0pt 0pt 10px 10px; cursor: pointer; width: 300px; height: 161px;" src="http://blog.haciendomedia.com/wp-content/uploads/WebM-300x161.png" alt="" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;Then, I'm going to use theoraenc, the theora encoder, and oggmux, the ogg multiplexer. Of course in the future theora can be replaced by &lt;a href="http://www.blogger.com/en.wikipedia.org/wiki/H.264/MPEG-4_AVC"&gt;h.264&lt;/a&gt; or &lt;a href="http://www.blogger.com/en.wikipedia.org/wiki/VP8"&gt;VP8&lt;/a&gt; and ogg can be replaced by other containers like &lt;a href="http://www.blogger.com/www.webmproject.org/"&gt;webM&lt;/a&gt; or &lt;a href="http://www.blogger.com/www.matroska.org"&gt;mkv&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;And finally, the last and for me the most difficult part of this test, to send this coded and multiplexed video that we have got, through the red.&lt;br /&gt;Well, as you know internet is an end-to-end global net. It means that you can't broadcast like if It were a TV broadcasting. Ok, at this point, I did some tests in the way of how could send this data. I spent a lot of days investigating the HTTP protocol and how I could send this data through this protocol, like VLC does.&lt;br /&gt;The strange thing was that when I was capturing the traffic produced by the VLC streaming server, there was no traffic.&lt;br /&gt;&lt;p&gt;&lt;br /&gt;Then, in the other machine, opened my HTML 5 file, and then, suddenly a lot of HTTP and &lt;a href="http://www.blogger.com/en.wikipedia.org/wiki/Transmission_Control_Protocol"&gt;TCP&lt;/a&gt; traffic were produced.&lt;br /&gt;When you looked at the traffic, you could see the TCP handshake, and then a get request from the client to the server.&lt;br /&gt;It made me think that perhaps the video HTML5 tag manages to encapsulate the data in the HTTP protocol in order to be displayed in the web page, and we just have to send the data via TCP. And effectively, this worked.&lt;/p&gt;&lt;br /&gt;To sum up, in my &lt;a href="http://www.blogger.com/www.ubuntu.com"&gt;ubuntu&lt;/a&gt;, I executed:&lt;br /&gt;&lt;br /&gt;&lt;span style="font-family:courier new;"&gt;gst-launch -v videotestsrc ! theoraenc ! oggmux ! tcpserversink host=192.168.1.35 port=8080&lt;/span&gt;&lt;br /&gt;&lt;p&gt;&lt;br /&gt;If you have read the blog posts before, you'll know what is gst-launch, the -v is to verbose all the information, the ! sign is used to indicate that this elements are connected, and the las element is a TCP sender, which sends to this IP and to the 8080 port. This is running in the server's machine.&lt;/p&gt;&lt;a href="http://www.blogsdna.com/wp-content/uploads/2010/01/logo-wordmark-version.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 457px; height: 116px;" src="http://www.blogsdna.com/wp-content/uploads/2010/01/logo-wordmark-version.png" alt="" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;In the client machine I just have to open the HTML5 web page with a browser like &lt;a href="http://www.blogger.com/www.google.com/chrome"&gt;Google Chrome 8&lt;/a&gt; or &lt;a href="http://www.blogger.com/www.mozilla.com"&gt;Firefox 3.6&lt;/a&gt; which accept the HTML5 video tag.&lt;br /&gt;If you want to be sure that is being correctly sent, I recommend you to check the traffic in both points of the connection, in the client and server.&lt;br /&gt;&lt;br /&gt;Right now, if all has been right, you ought to be doing streaming to a web page using Gstreamer and HTML5&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-4924589942997486202?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/4924589942997486202/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/03/streaming-to-html5-web-page.html#comment-form' title='2 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/4924589942997486202'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/4924589942997486202'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/03/streaming-to-html5-web-page.html' title='Streaming to an HTML5 web page'/><author><name>Marcel</name><uri>http://www.blogger.com/profile/06357583442533980245</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://1.bp.blogspot.com/_Ylt9MrZAUYw/SusTUSiSR1I/AAAAAAAAACg/nLv7hwBoa8w/s72-c/SMPTE-GStreamerQtPythonMacOSX-test.png' height='72' width='72'/><thr:total>2</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-2698167037703991807</id><published>2011-03-04T11:43:00.008+01:00</published><updated>2011-04-28T10:43:29.273+02:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Monica'/><category scheme='http://www.blogger.com/atom/ns#' term='text'/><category scheme='http://www.blogger.com/atom/ns#' term='retrieval'/><title type='text'>Textual similarity graph</title><content type='html'>My goal for these two months was implement to a system that could be generated a textual &lt;a href="http://bitsearch.blogspot.com/2010/05/similarity-graph.html"&gt;similarity graph &lt;/a&gt;from the metadata associated to images.&lt;br /&gt;&lt;div style="TEXT-ALIGN: justify"&gt;&lt;br /&gt;We had only worked with the visual modality until now. In our case, four different visual descriptors from the MPEG-7 standard are considered: Color Structure, dominant Color, Color Layout and Texture Edge Histogram. But, we would like to work with multimodal systems to improve the results obtained from diverse algorithms such as the reranking algorithm.&lt;br /&gt;&lt;br /&gt;In this post I want to explain you the general scheme of the system. Basically, it consists in two steps. First one corresponds to the extractor of the text descriptor and the second one generates the textual similarity graph.&lt;br /&gt;&lt;div style="TEXT-ALIGN: justify"&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/-Tx7i88CxIDs/TXDHikIx1QI/AAAAAAAAALQ/WF9REzguLdQ/s1600/esquema.png"&gt;&lt;img style="TEXT-ALIGN: center; MARGIN: 0px auto 10px; WIDTH: 400px; DISPLAY: block; HEIGHT: 219px; CURSOR: pointer" id="BLOGGER_PHOTO_ID_5580179334942020866" alt="" src="http://2.bp.blogspot.com/-Tx7i88CxIDs/TXDHikIx1QI/AAAAAAAAALQ/WF9REzguLdQ/s400/esquema.png" border="0" /&gt;&lt;/a&gt;&lt;span style="COLOR: rgb(51,204,255); FONT-WEIGHT: bold"&gt;Extractor&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;As well as the visual modality, we need a descriptor that could represent the metadata associated to the image. As I told in one of my last post, we calculate the text desciptors based on the &lt;a href="http://bitsearch.blogspot.com/2011/01/ranking-documents-based-on-tf-idf.html"&gt;TF-IDF weight&lt;/a&gt;.&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/-nzEf8nA7-7g/TXDDVh33PkI/AAAAAAAAALI/y3Eq9PgfBrg/s1600/dictionary.png"&gt;&lt;img style="MARGIN: 0pt 0pt 10px 10px; WIDTH: 179px; FLOAT: right; HEIGHT: 145px; CURSOR: pointer" id="BLOGGER_PHOTO_ID_5580174712949390914" alt="" src="http://4.bp.blogspot.com/-nzEf8nA7-7g/TXDDVh33PkI/AAAAAAAAALI/y3Eq9PgfBrg/s320/dictionary.png" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;Previously to this step we have to build a dictionary. The dictionary contains the terms that are used to represent all the text descriptors, so all images are represented with the same terms. Moroever, each term have associated an index what indicates the position of this term in the text descriptor.&lt;br /&gt;&lt;br /&gt;&lt;span style="COLOR: rgb(51,204,255); FONT-WEIGHT: bold"&gt;Textual Similarity graph&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;Next step is determining the similarity between the different text descriptors to generate the textual SG. We use the cosine similarity to quantify the similarity between text descriptors, as I said in my &lt;a href="http://bitsearch.blogspot.com/2011/01/vector-space-model-for-scoring.html"&gt;last post&lt;/a&gt;. Finally, we obtain a similarity graph where the nodes are the metadata of the images and the edge are the similarity between their text descriptors.&lt;br /&gt;&lt;br /&gt;So now we can work with four visual similarity graph and a textual one. &lt;/div&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-2698167037703991807?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/2698167037703991807/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/03/textual-similarity-graph.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/2698167037703991807'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/2698167037703991807'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/03/textual-similarity-graph.html' title='Textual similarity graph'/><author><name>Monica</name><uri>http://www.blogger.com/profile/04558100039934227329</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://2.bp.blogspot.com/-Tx7i88CxIDs/TXDHikIx1QI/AAAAAAAAALQ/WF9REzguLdQ/s72-c/esquema.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-7109892444224230561</id><published>2011-03-03T14:48:00.008+01:00</published><updated>2011-08-17T17:07:38.426+02:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Martí'/><category scheme='http://www.blogger.com/atom/ns#' term='HbbTv'/><title type='text'>Introducing Launcher HbbTV Application</title><content type='html'>&lt;div style="margin: 0px; color: rgb(0, 0, 0);"&gt;&lt;h3 id="internal-source-marker_0.10123180295340717"   style=";font-family:'Times New Roman';font-size:medium;"&gt;&lt;span class="Apple-style-span" style="white-space: pre-wrap;font-family:Arial;font-size:15px;"  &gt;Context&lt;/span&gt;&lt;/h3&gt; &lt;h3   style="font-weight: normal;font-family:'Times New Roman';font-size:medium;"&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;font-family:Arial;font-size:11pt;"  &gt;As I explained in the previous post, I'm working on developing applications for &lt;a href="http://bitsearch.blogspot.com/2011/03/hybrid-broadcast-broadband-tv.html"&gt;HbbTV standard&lt;/a&gt;.  &lt;/span&gt;&lt;/h3&gt;&lt;h3 id="internal-source-marker_0.10123180295340717" style=";font-family:'Times New Roman';font-size:medium;"&gt;&lt;br /&gt;&lt;/h3&gt;&lt;h3 id="internal-source-marker_0.10123180295340717"   style=";font-family:'Times New Roman';font-size:medium;"&gt;&lt;span class="Apple-style-span" style="white-space: pre-wrap;font-family:Arial;font-size:15px;"  &gt;What’s a launcher application?&lt;/span&gt;&lt;/h3&gt;&lt;h3   style="font-weight: normal;font-family:'Times New Roman';font-size:medium;"&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;font-family:Arial;font-size:11pt;"  &gt;I have to develop a launcher application. The function of this application is to show to the user all the applications available and to start them when user wants.&lt;/span&gt;&lt;/h3&gt;&lt;h3   style="font-weight: normal;font-family:'Times New Roman';font-size:medium;"&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;font-family:Arial;font-size:11pt;"  &gt;This is an example of a mobile launcher application.&lt;/span&gt;&lt;/h3&gt;&lt;h3   style=";font-family:'Times New Roman';font-size:medium;"&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;img src="https://lh5.googleusercontent.com/SzHlTEqb_uGj1LqD9EXLVYchDQ2Gq0cbCOLuAEsjeJdHY52ji6wWOw9zIj_kkD56G0VR5lC8mv7GEDIWHl4HSRg4jCB5_bG0L5AjdKoTdeCJFkv0peI" height="276px;" width="165px;" /&gt;&lt;/h3&gt;&lt;span class="Apple-style-span"  style="font-size:11pt;"&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;font-family:Arial;font-size:11pt;"  &gt;For now, I have started to try to understand some of &lt;/span&gt;&lt;a href="http://en.wikipedia.org/wiki/JavaScript" style="font-family: 'Times New Roman'; font-size: medium;"&gt;&lt;span style="font-style: normal; text-decoration: underline; vertical-align: baseline; white-space: pre-wrap;font-family:Arial;font-size:11pt;"  &gt;JavaScript&lt;/span&gt;&lt;/a&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;font-family:Arial;font-size:11pt;"  &gt; files and their interaction with &lt;/span&gt;&lt;a href="http://en.wikipedia.org/wiki/HTML" style="font-family: 'Times New Roman'; font-size: medium;"&gt;&lt;span style="font-style: normal; text-decoration: underline; vertical-align: baseline; white-space: pre-wrap;font-family:Arial;font-size:11pt;"  &gt;HTML&lt;/span&gt;&lt;/a&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;font-family:Arial;font-size:11pt;"  &gt; and &lt;/span&gt;&lt;a href="http://en.wikipedia.org/wiki/Cascading_Style_Sheets" style="font-family: 'Times New Roman'; font-size: medium;"&gt;&lt;span style="font-style: normal; text-decoration: underline; vertical-align: baseline; white-space: pre-wrap;font-family:Arial;font-size:11pt;"  &gt;CSS&lt;/span&gt;&lt;/a&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;font-family:Arial;font-size:11pt;"  &gt;. &lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span"  style="font-size:11pt;"&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;font-family:Arial;font-size:11pt;"  &gt;I have done some tests:&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;font-family:Arial;font-size:11pt;"  &gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt; &lt;/span&gt;- I created an interface to show applications&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;font-family:Arial;font-size:11pt;"  &gt;&lt;span class="Apple-tab-span" style="white-space: pre;"&gt; &lt;/span&gt;- I tried to start another application&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span"  style="font-size:11pt;"&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;font-family:Arial;font-size:11pt;"  &gt;I try my programs with a web browser like &lt;/span&gt;&lt;a href="http://www.mozilla.com/es-ES/firefox/" style="font-family: 'Times New Roman'; font-size: medium;"&gt;&lt;span style="font-style: normal; text-decoration: underline; vertical-align: baseline; white-space: pre-wrap;font-family:Arial;font-size:11pt;"  &gt;Mozilla Firefox&lt;/span&gt;&lt;/a&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;font-family:Arial;font-size:11pt;"  &gt;, and I have also tested with two different set-top boxes. &lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span"  style="font-size:11pt;"&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;font-family:Arial;font-size:11pt;"  &gt;The graphic structure of my test application is composed by:&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span"  style="font-size:11pt;"&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap; color: rgb(0, 0, 0);font-family:Arial;font-size:11pt;"  &gt;- Background&lt;/span&gt;&lt;/div&gt;&lt;div    style="margin: 0px;font-family:'Times New Roman';font-size:medium;color:transparent;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div    style="margin: 0px;font-family:'Times New Roman';font-size:medium;color:transparent;"&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;img src="https://lh5.googleusercontent.com/yLJTVzLSMGmfX5r5JIUJOSyiSaZybbONn5Y4N68pDq9q4OeVdswTTUSxRujJMq29k9Q0gSzYxpYe8rjNGUsF14FUc2I-LS5fUA61Ur4JKR8_-QWte1Y" height="175px;" width="314px;" /&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap; color: rgb(0, 0, 0);font-family:Arial;font-size:11pt;"  &gt;- Icons&lt;/span&gt;&lt;/div&gt;&lt;div style="margin: 0px;font-family:'Times New Roman';font-size:medium;color:transparent;"&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;img src="https://lh6.googleusercontent.com/z4aXlZDM56_f5FiQp9L7rBZXDHSMBBoqPzC4EooNLZeUiDo1sEJx2NSh2dC3wYqm6DGqq-v1Rpu736l-Ro0Hn5hVs6eWV34I8Op_RwxycRjncwcULDc" height="109px;" width="450px;" /&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap; color: rgb(0, 0, 0);font-family:Arial;font-size:11pt;"  &gt;- Focus&lt;/span&gt;&lt;/div&gt;&lt;div   style="margin: 0px; color: rgb(0, 0, 0);font-family:'Times New Roman';font-size:medium;"&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;img src="https://lh5.googleusercontent.com/q2t1RRuV2LwHFJ2rqfc0tef3zLiW94J0s5J7BcdqWPMkdZ7wLNpFM4eJCCouwHxHLcrH5DwJuc_-CmYAvBUU2SPV9YtU7Z-yrriiQF2UCBWJg2jMdl4" height="109px;" width="450px;" /&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;font-family:Arial;font-size:11pt;"  &gt;When I press the arrows in the set-top box remote control, the focus changes. In this way, I can see the app that is going to be selected. But I can not move the focus, because is very hard to be decoded by the set-top box. There must be 4 focus, and always be hiding and showing the right one.  &lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;font-family:Arial;font-size:11pt;"  &gt;This is a photograph of my first test.&lt;/span&gt;&lt;/div&gt;&lt;div face="'Times New Roman'" size="medium" color="transparent" style="margin: 0px;"&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;font-family:Arial;font-size:11pt;"  &gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div    style="margin: 0px;   font-family:'Times New Roman';font-size:medium;color:transparent;"&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;font-family:Arial;font-size:11pt;"  &gt; &lt;/span&gt;&lt;br /&gt;&lt;span style="font-style: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;font-family:Arial;font-size:11pt;"  &gt;&lt;/span&gt;&lt;img src="https://lh4.googleusercontent.com/lo2mRAny071K2v-pw9_WSqX8Aw-JTMgAiq6i9FeEjVC396Ng3FS52p2CmpJ-B5iiQHhCVcs5MnVSRWxtQT9cjRAD0b1wZHD7YiNU-YmhAHkMr-Eyx9I" height="320px;" width="484px;" /&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-7109892444224230561?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/7109892444224230561/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/03/introducing-launcher-hbbtv-application.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/7109892444224230561'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/7109892444224230561'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/03/introducing-launcher-hbbtv-application.html' title='Introducing Launcher HbbTV Application'/><author><name>Martí Alcon</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-4243130117244862400</id><published>2011-03-01T17:49:00.007+01:00</published><updated>2011-03-03T06:19:26.825+01:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='HTTP'/><category scheme='http://www.blogger.com/atom/ns#' term='Marcel'/><title type='text'>Going through the HTTP protocol</title><content type='html'>When you are looking up a website, a lot of things happen. One of them is related with the HTTP protocol.&lt;br /&gt;&lt;p&gt;Let's talk about HTTP. Fist of all, HTTP is a protocol created by &lt;a href="http://www.w3.org/"&gt;World Wide Web consortium&lt;/a&gt; and&lt;a href="http://www.ietf.org/"&gt; Internet Engineering task&lt;/a&gt; in order to allow all the elements which allow a web architecture to communicate themselves.&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://1.bp.blogspot.com/-voXXSl4q770/TW0myokDZ1I/AAAAAAAAABc/ZAsPkRvGZe8/s1600/112944254_3f5212215a.jpg"&gt;&lt;img style="margin: 0pt 0pt 10px 10px; float: right; cursor: pointer; width: 320px; height: 178px;" src="http://1.bp.blogspot.com/-voXXSl4q770/TW0myokDZ1I/AAAAAAAAABc/ZAsPkRvGZe8/s320/112944254_3f5212215a.jpg" alt="" id="BLOGGER_PHOTO_ID_5579158164705863506" border="0" /&gt;&lt;/a&gt;&lt;/p&gt;And then, how does it work?&lt;br /&gt;&lt;p&gt;Certainly, is not a very difficult protocol, it's based on request-reply scheme. This means that in a communication, first, the client will send a request, then the server will reply this fulfilling as well  as possible.&lt;/p&gt;It's important to highlight that it's a stateless protocol, because it doesn't keep any state, this limitation was the main reason for the introduction of &lt;a href="http://es.wikipedia.org/wiki/Cookie"&gt;cookies&lt;/a&gt; in web browsers, to keep a state in the client.&lt;br /&gt;&lt;p&gt;In the request,  the client normally asks for some information, and the server gives it back. When you are in the situation above mentioned, looking up for a web page, the server has returned the whole web page in the reply.&lt;a href="http://etutorials.org/shared/images/tutorials/tutorial_37/02fig02.gif"&gt;&lt;img style="margin: 0pt 0pt 10px 10px; float: right; cursor: pointer; width: 330px; height: 173px;" src="http://etutorials.org/shared/images/tutorials/tutorial_37/02fig02.gif" alt="" border="0" /&gt;&lt;/a&gt;&lt;/p&gt;But HTTP is not only to transmit web pages. It can be used for transmitting some attributes and values which can launch a &lt;a href="http://en.wikipedia.org/wiki/Computer-generated_imagery"&gt;CGI script&lt;/a&gt; in the server, for example.&lt;p&gt;Furthermore, and this is the most important part for my project, HTTP is used also to send video, avoiding the firewalls. Therefore, we could send video packets through the web, and take it with a &lt;a href="http://en.wikipedia.org/wiki/HTML5"&gt;HTML 5&lt;/a&gt; interface.&lt;/p&gt;But well, focusing in the practice, I'm going to show a real example of HTTP communication.&lt;br /&gt;&lt;p&gt;Once a connection has been opened at the server's URI and port, (i.e. URI: http://www.example.com/index.html, port:80) the client sends this message to the server.&lt;/p&gt;&lt;br /&gt;&lt;span style="font-family:courier new;"&gt;GET /index.html HTTP/1.1&lt;/span&gt;&lt;br /&gt;&lt;p&gt;&lt;span style="font-family:courier new;"&gt;Host: www.example.com&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="font-family:courier new;"&gt;User-Agent: name-client&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="font-family:courier new;"&gt;[Blank line]&lt;/span&gt;&lt;br /&gt;&lt;/p&gt;&lt;p&gt;Then, the server replies with:&lt;br /&gt;&lt;/p&gt;&lt;p&gt;&lt;br /&gt;&lt;span style="font-family:courier new;"&gt;HTTP/1.1 200 OK&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="font-family:courier new;"&gt;Fate: Fri, 31 Dec 2003 23:59:59 GMT&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="font-family:courier new;"&gt;Content-Type: text/html&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="font-family:courier new;"&gt;Content-Length: 1221&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="font-family:courier new;"&gt;[...CONTENT...]&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;As you can see, the server is answering that  the server's response has been right, with HTTP version 1.1, with the “OK” message, and the 200 code, which is a status code. Here is the &lt;a href="http://en.wikipedia.org/wiki/List_of_HTTP_status_codes"&gt;l&lt;/a&gt;&lt;a href="http://en.wikipedia.org/wiki/List_of_HTTP_status_codes"&gt;&lt;/a&gt;&lt;a href="http://en.wikipedia.org/wiki/List_of_HTTP_status_codes"&gt;ist of status codes&lt;/a&gt;.&lt;/p&gt;&lt;p&gt;&lt;br /&gt;Moreover, the data is in the reply, always in GMT format, and also the content type, which refers to what &lt;a href="http://www.blogger.com/en.wikipedia.org/wiki/MIME"&gt;mime type&lt;/a&gt; the data is, and finally, the size of this set of information.&lt;br /&gt;The HTTP packet can have some attributes that can make the protocol work in some different ways. For example, there is an attribute, “content-encoding”, which explain how the data are encoded, or an authentication via HTTP.&lt;/p&gt;&lt;p&gt;Here you can see the whole &lt;a href="http://en.wikipedia.org/wiki/List_of_HTTP_header_fields"&gt;list of attributes&lt;/a&gt; that client and server can request or reply respectively.&lt;/p&gt;&lt;p&gt;As an example, I'm going to show you HTTP headers of a traffic capture that I've done with &lt;a href="http://www.blogger.com/www.wireshark.org"&gt;Wireshark&lt;/a&gt; while I was streaming a video with&lt;a href="http://www.videolan.org/"&gt; VLC(Videolan) &lt;/a&gt;and capturing it with a HTML 5 interface.&lt;/p&gt;&lt;p&gt;&lt;br /&gt;&lt;/p&gt;&lt;p&gt;&lt;a href="http://3.bp.blogspot.com/-92JLxC4jybk/TW0pGLsju7I/AAAAAAAAABk/89gM6XF5bug/s1600/Screenshot.png"&gt;&lt;img style="margin: 0px auto 10px; display: block; text-align: center; cursor: pointer; width: 440px; height: 297px;" src="http://3.bp.blogspot.com/-92JLxC4jybk/TW0pGLsju7I/AAAAAAAAABk/89gM6XF5bug/s400/Screenshot.png" alt="" id="BLOGGER_PHOTO_ID_5579160699577547698" border="0" /&gt;&lt;/a&gt;&lt;/p&gt;&lt;p&gt;&lt;br /&gt;&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-4243130117244862400?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/4243130117244862400/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/03/going-throught-http-protocol.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/4243130117244862400'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/4243130117244862400'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/03/going-throught-http-protocol.html' title='Going through the HTTP protocol'/><author><name>Marcel</name><uri>http://www.blogger.com/profile/06357583442533980245</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://1.bp.blogspot.com/-voXXSl4q770/TW0myokDZ1I/AAAAAAAAABc/ZAsPkRvGZe8/s72-c/112944254_3f5212215a.jpg' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-435559544337910322</id><published>2011-03-01T09:33:00.007+01:00</published><updated>2011-03-03T10:11:56.863+01:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Martí'/><category scheme='http://www.blogger.com/atom/ns#' term='HbbTv'/><title type='text'>Hybrid Broadcast-Broadband TV</title><content type='html'>&lt;b  style="font-family:arial;"&gt;&lt;span style=""&gt;Presentation&lt;/span&gt;&lt;/b&gt;  &lt;p  class="Standard" style="font-family:arial;"&gt;&lt;span style=""&gt;I am &lt;a href="http://www.blogger.com/profile/14456178522106737837"&gt;Martí Alcon&lt;/a&gt;, a student who's working in his diploma thesis (Projecte Final de Carrera)&lt;/span&gt;&lt;span style=""&gt; with &lt;a href="http://www.blogger.com/profile/07884429360374984166"&gt;Xavier Giró&lt;/a&gt;. &lt;/span&gt;&lt;/p&gt;    &lt;p  class="Standard" style="font-family:arial;"&gt;&lt;span style=""&gt;My project is about the development of a launcher application to use it in an interactive television environment suported on the &lt;a href="http://www.hbbtv.org/"&gt;HbbTV standard&lt;/a&gt;.&lt;/span&gt;&lt;/p&gt;&lt;p  class="Standard" style="font-family:arial;"&gt;&lt;span style=""&gt;I'm working with &lt;a href="http://www.activamultimedia.com/am/v_portal/apartados/pl_home.php?te=59&amp;amp;idm=2"&gt;Activa Multimèdia&lt;/a&gt; like &lt;a href="http://bitsearch.blogspot.com/search/label/Cristina"&gt;Cristina&lt;/a&gt; and &lt;a href="http://bitsearch.blogspot.com/search/label/Manel"&gt;Mane&lt;/a&gt;&lt;a href="http://bitsearch.blogspot.com/search/label/Manel"&gt;l&lt;/a&gt;. Cristina worked on adapting &lt;a href="http://twitter.com/"&gt;Twitter&lt;/a&gt; for HbbTV applications. However, my project tries to develop a launcher, to start other apps. &lt;br /&gt;&lt;/span&gt;&lt;/p&gt;    &lt;p  class="Standard" style="font-family:arial;"&gt;&lt;span style=""&gt;So this post, talks about the environment of my project: Hybrid Broadcast-Broadband TV.&lt;br /&gt;&lt;/span&gt;&lt;/p&gt;&lt;p  class="Standard" style="font-family:arial;"&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/-RNC4RkVpTUc/TWzADufT5DI/AAAAAAAAAEY/mNbqH8rL2io/s1600/hbbtv-logo_source.png"&gt;&lt;img style="float: left; margin: 0pt 10px 10px 0pt; cursor: pointer; width: 320px; height: 151px;" src="http://4.bp.blogspot.com/-RNC4RkVpTUc/TWzADufT5DI/AAAAAAAAAEY/mNbqH8rL2io/s320/hbbtv-logo_source.png" alt="" id="BLOGGER_PHOTO_ID_5579045208656700466" border="0" /&gt;&lt;/a&gt;&lt;/p&gt;&lt;p  class="Standard" style="font-family:arial;"&gt;&lt;br /&gt;&lt;/p&gt;&lt;p  class="Standard" style="font-family:arial;"&gt;&lt;br /&gt;&lt;/p&gt;&lt;p  class="Standard" style="font-family:arial;"&gt;&lt;br /&gt;&lt;/p&gt;&lt;p  class="Standard" style="font-family:arial;"&gt;&lt;br /&gt;&lt;/p&gt;&lt;p  class="Standard" style="font-family:arial;"&gt;&lt;br /&gt;&lt;/p&gt;  &lt;p face="arial" class="Standard"&gt;&lt;span style=""&gt; &lt;/span&gt;&lt;/p&gt;  &lt;p face="arial" class="Standard"&gt;&lt;span style=""&gt; &lt;/span&gt;&lt;/p&gt;  &lt;p face="arial" class="Standard"&gt;&lt;span style=""&gt; &lt;/span&gt;&lt;/p&gt;  &lt;p face="arial" class="Standard"&gt;&lt;b&gt;&lt;span style=""&gt;&lt;br /&gt;&lt;/span&gt;&lt;/b&gt;&lt;/p&gt;&lt;p face="arial" class="Standard"&gt;&lt;b&gt;&lt;span style=""&gt;What is HbbTV?&lt;/span&gt;&lt;/b&gt;&lt;/p&gt;  &lt;p face="arial" class="Standard"&gt;&lt;span style=""&gt;HbbTV is a standard of hybrid television, that's formed by a specification that tries to regulate the emissions of contents via terrestrial and IP. &lt;/span&gt;&lt;/p&gt;    &lt;p style="font-family: arial;" class="Standard"&gt;&lt;span style=""&gt;Usually, we are talking of recieving contents using DTV (Digital TV) or IP, but we can also recieve them using wire or satellite. &lt;/span&gt;&lt;/p&gt;  &lt;p style="font-family: arial;" class="Standard"&gt;&lt;span style=""&gt;HbbTV meets specifications of other standards such &lt;a href="http://www.openiptvforum.org/"&gt;OIPFTV&lt;/a&gt;, &lt;a href="http://www.ce.org/"&gt;CEA&lt;/a&gt;, &lt;a href="http://www.dvb.org/"&gt;DVB&lt;/a&gt; and &lt;a href="http://www.w3c.es/"&gt;W3C&lt;/a&gt;.&lt;/span&gt; &lt;/p&gt;  &lt;p style="font-family: arial;" class="Standard"&gt;&lt;span style=""&gt; &lt;/span&gt;&lt;/p&gt;  &lt;p style="font-family: arial;" class="Standard"&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/-7m8UQmMK6VQ/TWy_1J_PWBI/AAAAAAAAAEQ/ihXvq-3173I/s1600/image.bmp"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 320px; height: 248px;" src="http://2.bp.blogspot.com/-7m8UQmMK6VQ/TWy_1J_PWBI/AAAAAAAAAEQ/ihXvq-3173I/s320/image.bmp" alt="" id="BLOGGER_PHOTO_ID_5579044958340339730" border="0" /&gt;&lt;/a&gt;&lt;/p&gt;&lt;p style="font-family: arial;" class="Standard"&gt;&lt;span style=""&gt;The programming languages of the HbbTV applications are:&lt;/span&gt;&lt;/p&gt;  &lt;p style="font-family: arial;" class="Standard"&gt;&lt;span style=""&gt;XHTML or php, CSS and JavaScript.&lt;/span&gt;&lt;/p&gt;&lt;p style="font-family: arial;" class="Standard"&gt;&lt;span style=""&gt;AJAX is a web development technique also used in HbbTV applications.&lt;br /&gt;&lt;/span&gt;&lt;/p&gt;&lt;span style=";font-family:&amp;quot;;" &gt;&lt;/span&gt;&lt;br /&gt;&lt;p style="font-family: arial;" class="Standard"&gt;Broadcasters can use HbbTV application to create&lt;span style=""&gt; catch-up services, video on demand (VOD), menus with related videos, search engines...&lt;/span&gt;&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-435559544337910322?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/435559544337910322/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/03/hybrid-broadcast-broadband-tv.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/435559544337910322'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/435559544337910322'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/03/hybrid-broadcast-broadband-tv.html' title='Hybrid Broadcast-Broadband TV'/><author><name>Martí Alcon</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/-RNC4RkVpTUc/TWzADufT5DI/AAAAAAAAAEY/mNbqH8rL2io/s72-c/hbbtv-logo_source.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-6311445401108192456</id><published>2011-02-28T16:30:00.007+01:00</published><updated>2011-03-05T08:58:15.241+01:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Monica'/><category scheme='http://www.blogger.com/atom/ns#' term='thesis'/><title type='text'>Conclusions of my thesis</title><content type='html'>&lt;div style="text-align: justify;"&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/-79h1i3eaxck/TXADlfhWrkI/AAAAAAAAAK4/t61cRdPr2P8/s1600/9.jpg"&gt;&lt;img style="margin: 0pt 0pt 10px 10px; width: 200px; float: right; height: 149px; cursor: pointer;" id="BLOGGER_PHOTO_ID_5579963880963485250" alt="" src="http://2.bp.blogspot.com/-79h1i3eaxck/TXADlfhWrkI/AAAAAAAAAK4/t61cRdPr2P8/s200/9.jpg" border="0" /&gt;&lt;/a&gt;I made the oral presentation of my bachelor thesis "&lt;a href="http://upcommons.upc.edu/pfc/handle/2099.1/11106"&gt;Reordenació i agrupament d'imatges resultants d'una cerca de vídeo&lt;/a&gt;" &lt;span style="font-style: italic;"&gt;(Reranking and clustering images from a video search system) &lt;/span&gt;a month ago. You can view it in &lt;a href="http://www.youtube.com/watch?v=red4J9QjFAg"&gt;youtube &lt;/a&gt;in Catalan. This project has given me much knowledge both on a personal level and in terms of career. On the one hand, I learned different programming languages such as Java and GWT, as well as provide knowledge about the state of the art of the retrieval systems. On the other hand, the fact of carrying out the bachelor thesis in a company has given me an overview of working world that we will live in.&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;Furthermore, we presented a paper about the reranking algorithm that I told in this &lt;a href="http://bitsearch.blogspot.com/2010/07/scheme-of-reranking-results-for-video.html"&gt;blog&lt;/a&gt;, to the congress &lt;a href="http://www.icmr2011.org/"&gt;ACM ICMR 2011&lt;/a&gt; that is going to celebrate in Trento. Finally, our paper has been accepted and is scheduled for a poster session. The final acceptance rate is only 35%.&lt;br /&gt;&lt;br /&gt;In conclusion, I'm very glad with the final result of my project! &lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-6311445401108192456?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/6311445401108192456/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/02/conclusions-of-my-thesis.html#comment-form' title='2 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/6311445401108192456'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/6311445401108192456'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/02/conclusions-of-my-thesis.html' title='Conclusions of my thesis'/><author><name>Monica</name><uri>http://www.blogger.com/profile/04558100039934227329</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://2.bp.blogspot.com/-79h1i3eaxck/TXADlfhWrkI/AAAAAAAAAK4/t61cRdPr2P8/s72-c/9.jpg' height='72' width='72'/><thr:total>2</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-1414240347763306272</id><published>2011-02-24T21:33:00.010+01:00</published><updated>2011-02-24T22:39:44.582+01:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Eli'/><category scheme='http://www.blogger.com/atom/ns#' term='classification'/><category scheme='http://www.blogger.com/atom/ns#' term='measures'/><title type='text'>Measures out of a confusion matrix</title><content type='html'>A &lt;a href="http://en.wikipedia.org/wiki/Confusion_matrix"&gt;confusion matrix&lt;/a&gt; is used in &lt;a href="http://en.wikipedia.org/wiki/Supervised_learning"&gt;supervised learning&lt;/a&gt; for comparing the outcome classification of an item with the desired classification. Each row of the matrix represents the instances that have been automatically predicted in a class, while each column represents the hand-labeled instances in a class. This matrix is useful for observing which classes have miss-identified items as other classes.&lt;br /&gt;&lt;br /&gt;&lt;table border="1" cellpadding="2" cellspacing="0"&gt;&lt;tbody&gt;&lt;tr&gt;&lt;/tr&gt;&lt;tr&gt;&lt;th&gt;&lt;br /&gt;&lt;/th&gt; &lt;th&gt;&lt;br /&gt;&lt;/th&gt; &lt;th colspan="3"&gt;Hand-labeled&lt;/th&gt; &lt;/tr&gt; &lt;tr&gt; &lt;th&gt;&lt;br /&gt;&lt;/th&gt; &lt;th&gt;&lt;br /&gt;&lt;/th&gt; &lt;th&gt;Class1&lt;br /&gt;&lt;/th&gt; &lt;th style="text-align: left;"&gt;Class2&lt;/th&gt; &lt;th&gt;Class3&lt;br /&gt;&lt;/th&gt; &lt;/tr&gt; &lt;tr&gt; &lt;th rowspan="3"&gt;Automatic&lt;/th&gt; &lt;th&gt;Class1&lt;/th&gt; &lt;td&gt;10&lt;br /&gt;&lt;/td&gt; &lt;td&gt;3&lt;/td&gt; &lt;td&gt;0&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;th&gt;Class2&lt;/th&gt; &lt;td&gt;2&lt;/td&gt; &lt;td&gt;8&lt;br /&gt;&lt;/td&gt; &lt;td&gt;1&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt; &lt;th&gt;Class3&lt;/th&gt; &lt;td&gt;0&lt;/td&gt; &lt;td&gt;2&lt;/td&gt; &lt;td&gt;11&lt;/td&gt;&lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://3.bp.blogspot.com/-n4g4qz3JYcM/TWbJawaj8jI/AAAAAAAAABA/6rJx1iVHtZ4/s1600/FB.jpg"&gt;&lt;br /&gt;&lt;/a&gt;&lt;span style="font-size:130%;"&gt;&lt;br /&gt;&lt;a href="http://en.wikipedia.org/wiki/Precision_and_recall"&gt;Precision and recall&lt;/a&gt;:&lt;/span&gt;&lt;br /&gt;Precision or specifity is a mesasure of the ability of a system to present only relevant instances. It measures the exactness or fidelity of the system.&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://1.bp.blogspot.com/-biNeDw1sVyw/TWbKI9_ljSI/AAAAAAAAABQ/6LTBFLTVcZ8/s1600/precision.jpg"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 400px; height: 59px;" src="http://1.bp.blogspot.com/-biNeDw1sVyw/TWbKI9_ljSI/AAAAAAAAABQ/6LTBFLTVcZ8/s400/precision.jpg" alt="" id="BLOGGER_PHOTO_ID_5577367443973967138" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;Recall or sensitivity is a mesasure of the ability of a system to present all relevant instances, so it is used for evaluating the completeness of results.&lt;br /&gt;&lt;br /&gt;&lt;span style="font-size:130%;"&gt;&lt;span style="font-size:130%;"&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/-HwMeN6YwsOg/TWbKVCmb4SI/AAAAAAAAABY/e4u46tpmAHE/s1600/recall.jpg"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 400px; height: 54px;" src="http://4.bp.blogspot.com/-HwMeN6YwsOg/TWbKVCmb4SI/AAAAAAAAABY/e4u46tpmAHE/s400/recall.jpg" alt="" id="BLOGGER_PHOTO_ID_5577367651369083170" border="0" /&gt;&lt;/a&gt;&lt;/span&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;It is difficult to compare different systems in terms of precision and recall, as both measures are independent. Moreover, when recall increases precision tends to decrease: as more relevant instances are retrieved, the more nonrelevant instances are retrieved.&lt;br /&gt;&lt;span style="font-size:130%;"&gt;&lt;br /&gt;F and Fß measure&lt;/span&gt;:&lt;br /&gt;&lt;a href="http://en.wikipedia.org/wiki/F1_score"&gt;F-measure &lt;/a&gt;considers both precision and recall providing a single measurement for a system avoiding having two independent measures.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="font-size:130%;"&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/-1NmLSFpbE2s/TWbJFxQaCPI/AAAAAAAAAA4/HlUPweoy1tI/s1600/F.jpg"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 230px; height: 50px;" src="http://2.bp.blogspot.com/-1NmLSFpbE2s/TWbJFxQaCPI/AAAAAAAAAA4/HlUPweoy1tI/s400/F.jpg" alt="" id="BLOGGER_PHOTO_ID_5577366289503619314" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;In order to give different weights to precision and recall, the F-measure was derived so that Fß measures the effectiveness of retrieval with respect to a user who attaches ß times as much importance to recall as precision.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="font-size:130%;"&gt;&lt;span style="font-size:130%;"&gt;&lt;span style="font-size:130%;"&gt;&lt;span style="font-size:130%;"&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://3.bp.blogspot.com/-3zzEpUveAII/TWbMVVNRCkI/AAAAAAAAABg/RRvrNkFAv2k/s1600/FB.jpg"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 348px; height: 64px;" src="http://3.bp.blogspot.com/-3zzEpUveAII/TWbMVVNRCkI/AAAAAAAAABg/RRvrNkFAv2k/s400/FB.jpg" alt="" id="BLOGGER_PHOTO_ID_5577369855387044418" border="0" /&gt;&lt;/a&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="font-size:130%;"&gt;Fß measure of a confusion matrix&lt;/span&gt;:&lt;br /&gt;For my bachelor thesis I have had to write a function capable to compute the Fß measure for a given confusion matrix. This is the equation that has been derived from the explained measures:&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/-i-ZmAMpq47o/TWbJpMBDYAI/AAAAAAAAABI/Y7myz1-e5VY/s1600/mFB.jpg"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 367px; height: 84px;" src="http://4.bp.blogspot.com/-i-ZmAMpq47o/TWbJpMBDYAI/AAAAAAAAABI/Y7myz1-e5VY/s400/mFB.jpg" alt="" id="BLOGGER_PHOTO_ID_5577366897982398466" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;Where "i" represents the class and "N" is the total number of classes.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-1414240347763306272?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/1414240347763306272/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/02/measures-out-of-confusion-matrix.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/1414240347763306272'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/1414240347763306272'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/02/measures-out-of-confusion-matrix.html' title='Measures out of a confusion matrix'/><author><name>Eli Carcel</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://1.bp.blogspot.com/-biNeDw1sVyw/TWbKI9_ljSI/AAAAAAAAABQ/6LTBFLTVcZ8/s72-c/precision.jpg' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-6569868873573102414</id><published>2011-02-24T13:12:00.003+01:00</published><updated>2011-03-03T06:21:33.930+01:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='video'/><category scheme='http://www.blogger.com/atom/ns#' term='streaming'/><category scheme='http://www.blogger.com/atom/ns#' term='Marcel'/><title type='text'>HTTP Adaptive Streaming Techniques</title><content type='html'>There is no a clear and standard way to do streaming, but there are &lt;a href="http://david-s-pan.com/2010/08/11/adaptive-http-streaming-solutions/"&gt;a  few techniques&lt;/a&gt; to do it. At this point I'm going to summarize them.&lt;br /&gt;&lt;br /&gt;&lt;span style="font-size:180%;"&gt;Proprietary Techniques:   &lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="font-weight: bold;font-size:130%;" &gt;Adobe's Dynamic HTTP streaming&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="font-family:arial;"&gt;It  uses an own protocol called Real Time Message Protocol (RTMP) to put  the fragments into F4F files which are used, in addition to F4M which  carries all the features of the files encoded at multiples bit rates to  do a smooth streaming.&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="font-weight: bold;font-size:130%;" &gt;Apple's HTTP live streaming&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;a href="http://3.bp.blogspot.com/-SuJhYXto7A8/TWJT1N9qQ9I/AAAAAAAAABU/Ao_CuGMounY/s1600/HTTPLiveStreaming%2B.png"&gt;&lt;img style="margin: 0pt 10px 10px 0pt; float: left; cursor: pointer; width: 481px; height: 302px;" src="http://3.bp.blogspot.com/-SuJhYXto7A8/TWJT1N9qQ9I/AAAAAAAAABU/Ao_CuGMounY/s400/HTTPLiveStreaming%2B.png" alt="" id="BLOGGER_PHOTO_ID_5576111462384550866" border="0" /&gt;&lt;/a&gt;&lt;span style="font-family:arial;"&gt;A  division of the main stream is done in equal length segments and stored  (.ts). Also is created an index which contain a play-list of all the  media divisions and all the meta-data associated. The extension of this  file is .M3U8&lt;/span&gt; &lt;span style="font-family:arial;"&gt;This index is served to the client and thus, it can control all the streaming process.&lt;/span&gt;&lt;br /&gt;There is a &lt;a href="http://gps-tsc.upc.es/imatge/_Xgiro/teaching/thesis/2009-2010/BrunaGirvent/memoria.pdf"&gt;final thesis of Bruna&lt;/a&gt;,  another student of my career, which explains perfectly all the apple's  HTTP live streaming for more information. The document is written in  Catalan.&lt;br /&gt;&lt;br /&gt;&lt;span style="font-weight: bold;font-size:130%;" &gt;Microsoft's smooth streaming&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;This method's chunks are contiguous in a mp4 file for random access. There are two different formats:&lt;br /&gt;&lt;ul&gt;&lt;li&gt;.ismv: Contains both audio and video.&lt;/li&gt;&lt;li&gt;.isma: Contains olny audio&lt;/li&gt;&lt;/ul&gt;&lt;span style="font-weight: bold;"&gt;Server Manifest File (.ism)&lt;/span&gt; controls the relation between all media tracks, bit rates and files, while &lt;span style="font-weight: bold;"&gt;Client Manifest File&lt;/span&gt; describes all the available streams of the client, the coders that can be used to be after decoded, video resolutions, etc.&lt;br /&gt;Clients just request the fragments by the URI, using the quality and the offset to the next fragment.&lt;br /&gt;&lt;br /&gt;&lt;span style="font-size:180%;"&gt;Open Techniques:&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="font-size:130%;"&gt;&lt;span style="font-weight: bold;"&gt;Adaptive HTTP streaming (AHS)&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;It's a project from 3r&lt;a href="http://2.bp.blogspot.com/-juyvNNw8-64/TV_iKG2ECZI/AAAAAAAAABE/THh6I4EX8SY/s1600/3GPP_logo.gif"&gt;&lt;img style="margin: 0pt 10px 10px 0pt; float: left; cursor: pointer; width: 204px; height: 121px;" src="http://2.bp.blogspot.com/-juyvNNw8-64/TV_iKG2ECZI/AAAAAAAAABE/THh6I4EX8SY/s320/3GPP_logo.gif" alt="" id="BLOGGER_PHOTO_ID_5575423526971902354" border="0" /&gt;&lt;/a&gt;d Generation Partnership Project (3GPP) which consists in divide the whole stream in chunks.&lt;br /&gt;Usually to codify a frame, video coders use the previous frames, so in this case we should define all the &lt;a href="http://en.wikipedia.org/wiki/Group_of_pictures"&gt;group of pictures&lt;/a&gt;(GOP), starting from an I picture and ending before another one, as a chunk.&lt;br /&gt;Of  course, there is a meta-data file which indicates how to use this  little parts of video, which are each one in one different URI. The name  of this file is Media Presentation Description.(MPD)&lt;br /&gt;This version is the 9th release. You can see the whole standard in the official web page &lt;a href="http://www.3gpp.org/ftp/specs/html-info/26234.htm"&gt;chapter 26.234&lt;br /&gt;&lt;/a&gt;&lt;br /&gt;&lt;a href="http://1.bp.blogspot.com/-FZNyg9H8WgI/TV_hG0PUryI/AAAAAAAAAA0/v_RZFLgKKp8/s1600/Http%2Badaptive%2Bstreaming.jpg"&gt;&lt;br /&gt;&lt;/a&gt;&lt;a href="http://2.bp.blogspot.com/-Hq0giNiBwc4/TV_jvCJEXlI/AAAAAAAAABM/IAodnKxFmVA/s1600/Http%2Badaptive%2Bstreaming.jpg"&gt;&lt;img style="margin: 0px auto 10px; display: block; text-align: center; cursor: pointer; width: 400px; height: 178px;" src="http://2.bp.blogspot.com/-Hq0giNiBwc4/TV_jvCJEXlI/AAAAAAAAABM/IAodnKxFmVA/s400/Http%2Badaptive%2Bstreaming.jpg" alt="" id="BLOGGER_PHOTO_ID_5575425260876226130" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="font-weight: bold;font-size:130%;" &gt;Dynamic Adaptive Streaming over HTTP (&lt;a href="https://labs.ericsson.com/apis/streaming-media/documentation#Visual_Loss_Video_Source_-_How_to_make_packet_loss_clearly_visible"&gt;DASH&lt;/a&gt;)&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;The  10th release of 3GPP which is an improvement of AHS which includes new  features as well as on-demand streaming video or linear TV including  live media broadcast.&lt;br /&gt;&lt;br /&gt;&lt;span style="font-weight: bold;font-size:130%;" &gt;Open IPTV forum's HTTP adaptive streaming&lt;/span&gt;&lt;br /&gt;&lt;a href="http://4.bp.blogspot.com/-Fpdy5JqH0F8/TV_g6Okq-OI/AAAAAAAAAAk/1hq3HMPclPc/s1600/ipf_open_iptv_forum.gif"&gt;&lt;img style="margin: 0pt 10px 10px 0pt; float: left; cursor: pointer; width: 164px; height: 62px;" src="http://4.bp.blogspot.com/-Fpdy5JqH0F8/TV_g6Okq-OI/AAAAAAAAAAk/1hq3HMPclPc/s320/ipf_open_iptv_forum.gif" alt="" id="BLOGGER_PHOTO_ID_5575422154656905442" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;Open IPTV forum has adopted the 3GPP AHS and it's base to add some support to&lt;a href="http://www.slideshare.net/christian.timmerer/http-streaming-of-mpeg-media"&gt; MPEG-2 transport files&lt;/a&gt;.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-6569868873573102414?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/6569868873573102414/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/02/http-adaptive-streaming-techniques.html#comment-form' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/6569868873573102414'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/6569868873573102414'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/02/http-adaptive-streaming-techniques.html' title='HTTP Adaptive Streaming Techniques'/><author><name>Marcel</name><uri>http://www.blogger.com/profile/06357583442533980245</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://3.bp.blogspot.com/-SuJhYXto7A8/TWJT1N9qQ9I/AAAAAAAAABU/Ao_CuGMounY/s72-c/HTTPLiveStreaming%2B.png' height='72' width='72'/><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-1680813283254928012</id><published>2011-02-19T16:16:00.017+01:00</published><updated>2011-03-03T06:21:47.181+01:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='video'/><category scheme='http://www.blogger.com/atom/ns#' term='streaming'/><category scheme='http://www.blogger.com/atom/ns#' term='Marcel'/><title type='text'>Adaptive Streaming over HTTP</title><content type='html'>&lt;span style="font-family:arial;"&gt;How do we can watch a video on internet? There are a few ways to do that. We can just see it by&lt;/span&gt;&lt;span style="font-weight: bold;font-family:arial;" &gt; downloading the whole video&lt;/span&gt;&lt;span style="font-family:arial;"&gt; file and after, playing it.&lt;/span&gt;&lt;br /&gt;&lt;div  style="text-align: justify;font-family:arial;"&gt;&lt;span style="font-family:arial;"&gt;That's not comfortable for the user experience's because usually video sizes are not small, and depending of the bandwidth of the connection it can take a lot of time.&lt;/span&gt;&lt;br /&gt;&lt;span style="font-family:arial;"&gt;There is another way called&lt;/span&gt;&lt;span style="font-weight: bold;font-family:arial;" &gt; &lt;a href="http://www.blogger.com/www.w3.org/2010/11/web-and-tv/papers/webtv2_submission_64.pdf"&gt;progressive download&lt;/a&gt;&lt;/span&gt;&lt;span style="font-family:arial;"&gt;, it consists in a service in which you can download the video and play it at the same time. In this method, the video has to be stored in a server, it cannot be streamed by a live web cam.&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;There is a lot of &lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/-dZrAllbQHeQ/TV_g3CzmpFI/AAAAAAAAAAc/uYIjyNWgSso/s1600/flickr-3774664719-hd.jpg"&gt;&lt;img style="margin: 0pt 10px 10px 0pt; float: left; cursor: pointer; width: 200px; height: 148px;" src="http://4.bp.blogspot.com/-dZrAllbQHeQ/TV_g3CzmpFI/AAAAAAAAAAc/uYIjyNWgSso/s200/flickr-3774664719-hd.jpg" alt="" id="BLOGGER_PHOTO_ID_5575422099958703186" border="0" /&gt;&lt;/a&gt;discussion about what streaming is, and what is not.&lt;br /&gt;The definition of streaming says that just the needed parts of media are transferred in order to play them. I mean, in progressive download, services like &lt;a href="http://www.youtube.com/"&gt;&lt;span style="font-style: italic;"&gt;youtube&lt;/span&gt;&lt;/a&gt;, when you pause the video to do other things, this video is still downloading and wasting valuable bandwidth&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;In addition, returning to the title, it says adaptive. Why &lt;a href="https://labs.ericsson.com/apis/streaming-media/documentation#Visual_Loss_Video_Source_-_How_to_make_packet_loss_clearly_visible"&gt;adaptive&lt;/a&gt;?&lt;br /&gt;Imagine that you are doing streaming with a web cam. Suddenly, the net becomes more and more used, and your video is just being played slower because of the limitation of the net's bandwidth.&lt;br /&gt;Adaptive then, means that the &lt;a href="http://en.wikipedia.org/wiki/Scalable_Video_Coding"&gt;scalability&lt;/a&gt; of the video can change depending on the bandwidth available and affecting the spatial (pixels per unit of width), temporal (frames per second), quality &lt;a href="http://en.wikipedia.org/wiki/Discrete_cosine_transform"&gt;(DCT Coefficients&lt;/a&gt; and module of &lt;a href="http://en.wikipedia.org/wiki/Motion_vector"&gt;Motion Vectors&lt;/a&gt;), or combinations of them.&lt;br /&gt;Of this way, in the case you were watching a video and the bandwidth were variable, there would be   smoother than before.&lt;br /&gt;&lt;br /&gt;The other interesting issue is &lt;span style="font-weight: bold;"&gt;why to use HTTP&lt;/span&gt;.  HTTP is the protocol used to web pages, but there are some reasons that make it suitable for sending video through it.&lt;br /&gt;&lt;/div&gt;&lt;ul style="text-align: justify; font-family: arial;"&gt;&lt;li&gt;Easy and effortless streaming by avoiding NAT and firewall issues.&lt;/li&gt;&lt;li&gt;There is a clear trend in the web to send video trough HTTP. All &lt;a href="http://www.apple.com/"&gt;Apple's devices&lt;/a&gt;, and mobile devices general, you tube and more web streaming services.&lt;br /&gt;&lt;/li&gt;&lt;li&gt;It has the ability to give the control of the streaming to the client.&lt;/li&gt;&lt;/ul&gt;&lt;br /&gt;Seeing that, despite there are no standards yet, some companies and entities have defined some ways to do streaming over HTTP. Here in the Bitsearch, you can see &lt;a href="http://bitsearch.blogspot.com/2011/02/http-adaptive-streaming-techniques.html"&gt;another post&lt;/a&gt; with some of the most important technologies.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-1680813283254928012?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/1680813283254928012/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/02/adaptive-streaming-over-http.html#comment-form' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/1680813283254928012'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/1680813283254928012'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/02/adaptive-streaming-over-http.html' title='Adaptive Streaming over HTTP'/><author><name>Marcel</name><uri>http://www.blogger.com/profile/06357583442533980245</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/-dZrAllbQHeQ/TV_g3CzmpFI/AAAAAAAAAAc/uYIjyNWgSso/s72-c/flickr-3774664719-hd.jpg' height='72' width='72'/><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-4301555156898446379</id><published>2011-02-14T15:45:00.015+01:00</published><updated>2011-02-15T15:44:07.634+01:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='video'/><category scheme='http://www.blogger.com/atom/ns#' term='streaming'/><category scheme='http://www.blogger.com/atom/ns#' term='Marcel'/><category scheme='http://www.blogger.com/atom/ns#' term='html'/><title type='text'>Open source software for video streaming on the web</title><content type='html'>&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://3.bp.blogspot.com/-1o2H2ToxW3g/TVqQeP32RTI/AAAAAAAAAEU/sVrQgEl_83E/s1600/MainGoalGraph.jpg"&gt;&lt;br /&gt;&lt;/a&gt;&lt;br /&gt;In this blog post I would like to explain what is my final thesis about, and three possible actual technologies to succeed in.&lt;br /&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;It's about how to demonstrate the algorithms of the &lt;a href="http://gps-tsc.upc.es/imatge/"&gt;image processing  group&lt;/a&gt; in a web interface, with the most recent technologies such as HTML 5.&lt;br /&gt;&lt;br /&gt;To stream the video to a web page, we need a streaming engine. This, have to be able to stream the video, so we have to be aware about wheather the most common browsers support all this technologies.&lt;br /&gt;&lt;br /&gt;I think that with a little figure is easier to understand:&lt;br /&gt;&lt;br /&gt;&lt;a href="http://3.bp.blogspot.com/-1o2H2ToxW3g/TVqQeP32RTI/AAAAAAAAAEU/sVrQgEl_83E/s1600/MainGoalGraph.jpg"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 400px; height: 121px;" src="http://3.bp.blogspot.com/-1o2H2ToxW3g/TVqQeP32RTI/AAAAAAAAAEU/sVrQgEl_83E/s400/MainGoalGraph.jpg" alt="" id="BLOGGER_PHOTO_ID_5573926338155660594" border="0" /&gt;&lt;/a&gt;Where we can see the source as the output of some video processing.&lt;br /&gt;&lt;br /&gt;It's clear that all browsers accept HTTP protocol because all pages are served of this way.&lt;br /&gt;And other advantage that HTTP protocol has between other protocols like UDP or RTP, apart from the support for technologies, is that firewalls usually don't accept this type of traffic.&lt;br /&gt;&lt;br /&gt;Focusing in the streaming engine, there are three possible technologies to cover this area:&lt;br /&gt;&lt;br /&gt;&lt;span style="font-weight: bold;font-size:130%;" &gt;Gstreamer:&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/-gWYUY68oXqM/TVlF-YGhNFI/AAAAAAAAADU/3QM2P2PmRQ4/s1600/gstreamer.png"&gt;&lt;img style="margin: 0pt 10px 10px 0pt; float: left; cursor: pointer; width: 320px; height: 54px;" src="http://2.bp.blogspot.com/-gWYUY68oXqM/TVlF-YGhNFI/AAAAAAAAADU/3QM2P2PmRQ4/s320/gstreamer.png" alt="" id="BLOGGER_PHOTO_ID_5573562951771698258" border="0" /&gt;&lt;/a&gt;Gstreamer is an open source low-level library for multimedia streaming porpouses which is programmed in C. It's a very good designed library with a plug-ins structure which allows you to use a lot of functionalities.There is also an example of  software called "&lt;a href="http://www.flumotion.net/"&gt;Flumotion&lt;/a&gt;" from the company “Fluendo”  for HTTP streaming. This software could fulfill this requirements.&lt;br /&gt;&lt;br /&gt;Fluendo has one of its headquarters here, in Barcelona.&lt;br /&gt;&lt;br /&gt;A few days ago I wrote an article explaining&lt;a href="http://bitsearch.blogspot.com/2011/02/gstreamer-libraries-proposed-for.html"&gt; the basics of gstreamer&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;&lt;span style="font-size:130%;"&gt; &lt;span style="font-weight: bold;"&gt;FFmpeg:&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://3.bp.blogspot.com/-VZCVJ1QTy5w/TVlGJAYrwKI/AAAAAAAAADc/KIFI70EE-Sc/s1600/images.jpeg"&gt;&lt;img style="margin: 0pt 10px 10px 0pt; float: left; cursor: pointer; width: 267px; height: 65px;" src="http://3.bp.blogspot.com/-VZCVJ1QTy5w/TVlGJAYrwKI/AAAAAAAAADc/KIFI70EE-Sc/s320/images.jpeg" alt="" id="BLOGGER_PHOTO_ID_5573563134383997090" border="0" /&gt;&lt;/a&gt;&lt;a href="http://www.ffmpeg.org/"&gt;FFmpeg&lt;/a&gt; is also a complete solution for the manipulation of audiovisual content. This includes a lot of functionalities such as encoding, decoding, recording, audio and video streaming, etc.&lt;br /&gt;Some of Gstreamer plug-ins are written with ffmpeg functionalities, using them in some of their plug-ins. In terms of my project, FFmpeg is not so much recommended because there are too much changes between different versions of this  library. It could be the weakest point of this technology.&lt;br /&gt;&lt;br /&gt;&lt;span style="font-size:130%;"&gt; &lt;span style="font-weight: bold;"&gt;VLC media player&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/-lrKt7DhcGjE/TVlGSEvOiCI/AAAAAAAAADk/zRVnkmNzURQ/s1600/vlc_box_full.jpg"&gt;&lt;img style="margin: 0pt 10px 10px 0pt; float: left; cursor: pointer; width: 218px; height: 221px;" src="http://2.bp.blogspot.com/-lrKt7DhcGjE/TVlGSEvOiCI/AAAAAAAAADk/zRVnkmNzURQ/s320/vlc_box_full.jpg" alt="" id="BLOGGER_PHOTO_ID_5573563290171115554" border="0" /&gt;&lt;/a&gt;&lt;a href="http://www.videolan.org/"&gt;VLC&lt;/a&gt; media player is an open-source project for playing audiovisual content. It also has a streaming server which could be useful to stream to the web. VLC is programmed in C/C++ and the most interesting library for us is &lt;a href="http://www.live555.com/liveMedia/"&gt;liveMedia&lt;/a&gt;, which is a set of C++ libraries and frameworks for multimedia streaming (RTP/RTCP, RTSP, SIP) . It has also some functionalities taken of ffmpeg.&lt;br /&gt;&lt;br /&gt;We also have to be conscious about the compatibility of the browsers, the most important ones, at least.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Here there is a comparative of the usage of browsers during January of 2011.&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://3.bp.blogspot.com/-6GLJwj4V2ow/TVlK9wScrrI/AAAAAAAAAEE/AlRC1XjQoOs/s1600/600px-Web_browser_usage_share.svg.png"&gt;&lt;img style="margin: 0px auto 10px; display: block; text-align: center; cursor: pointer; width: 320px; height: 320px;" src="http://3.bp.blogspot.com/-6GLJwj4V2ow/TVlK9wScrrI/AAAAAAAAAEE/AlRC1XjQoOs/s320/600px-Web_browser_usage_share.svg.png" alt="" id="BLOGGER_PHOTO_ID_5573568438642454194" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;But the use of HTML 5 is not still implemented in all the browsers, in some of them.&lt;br /&gt;&lt;br /&gt;Here, there is another figure with the &lt;a href="http://bitsearch.blogspot.com/2009/08/about-html-5.html"&gt;HTML 5&lt;/a&gt; functionalities implemented in  the most updated version of each browser. My college Christian wrote &lt;a href="http://bitsearch.blogspot.com/2009/08/about-html-5.html"&gt;an article&lt;/a&gt; about  HTML  5 explaining what is and what news it includes. We have to look at the green one, which is support for the “video” tag which is able to handle a video stream into a web page. As you can see, In Chrome 4.1, Safari, Opera 10 and Firefox 3.6 this functionality is already implemented.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://3.bp.blogspot.com/-B3beu2pEEnk/TVlGzdBiYcI/AAAAAAAAADs/vXibheHbDps/s1600/html5browsercomparison.jpg"&gt;&lt;img style="margin: 0px auto 10px; display: block; text-align: center; cursor: pointer; width: 400px; height: 316px;" src="http://3.bp.blogspot.com/-B3beu2pEEnk/TVlGzdBiYcI/AAAAAAAAADs/vXibheHbDps/s400/html5browsercomparison.jpg" alt="" id="BLOGGER_PHOTO_ID_5573563863626047938" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-4301555156898446379?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/4301555156898446379/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/02/displaying-image-processing-group.html#comment-form' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/4301555156898446379'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/4301555156898446379'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/02/displaying-image-processing-group.html' title='Open source software for video streaming on the web'/><author><name>Marcel</name><uri>http://www.blogger.com/profile/17299945445492449290</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://3.bp.blogspot.com/-1o2H2ToxW3g/TVqQeP32RTI/AAAAAAAAAEU/sVrQgEl_83E/s72-c/MainGoalGraph.jpg' height='72' width='72'/><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-4710800177617150340</id><published>2011-02-11T13:15:00.008+01:00</published><updated>2011-02-11T13:42:21.565+01:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='retrieval'/><category scheme='http://www.blogger.com/atom/ns#' term='xavi'/><category scheme='http://www.blogger.com/atom/ns#' term='descriptors'/><title type='text'>Threshold Optimization for MPEG-7 Dominant Color Similarity in YCbCr</title><content type='html'>&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://3.bp.blogspot.com/-Go-40JuBQSI/TVUtE7Ice3I/AAAAAAAAAfI/uJDbtwg0FS8/s1600/42.png"&gt;&lt;br /&gt;&lt;/a&gt;These last days I have been adapting a C++ implementation of the &lt;a href="http://mpeg.chiariglione.org/standards/mpeg-7/mpeg-7.htm#E12E22"&gt;MPEG-7 Dominant Color &lt;/a&gt;Visual Descriptor to be used in my C libraries. This visual descriptor describes a region as a maximum of eight colors, providing for each of them their mean, variance (optional) and percentage of occupation. The extraction of the descriptor is performed with the software developed in &lt;a href="http://bitsearch.blogspot.com/2010/03/brand-new-mpeg-7-search-engine-based-on.html"&gt;Carles Ventura's thesis&lt;/a&gt;, while the similarity assessment is part of my Phd work in C.&lt;br /&gt;&lt;br /&gt;The first problem I had was the mismatch between the color space used for the MPE&lt;a href="http://4.bp.blogspot.com/-gwcz-pg1WHo/TVUulzp5TYI/AAAAAAAAAfQ/v8dfXRFiQyo/s1600/600px-YCbCr-CbCr_Scaled_Y50.png"&gt;&lt;img style="float: right; margin: 0pt 0pt 10px 10px; cursor: pointer; width: 200px; height: 200px;" src="http://4.bp.blogspot.com/-gwcz-pg1WHo/TVUulzp5TYI/AAAAAAAAAfQ/v8dfXRFiQyo/s200/600px-YCbCr-CbCr_Scaled_Y50.png" alt="" id="BLOGGER_PHOTO_ID_5572411340996824450" border="0" /&gt;&lt;/a&gt;G-7/XML files generated from the C++ library with the color space proposed by the standard to assess similarity. These files express the dominant colors in the RGB color space, while the standard metric is based on the &lt;a href="http://en.wikipedia.org/wiki/CIELUV"&gt;CIE LUV&lt;/a&gt; color space. Moreover, my C libraries do not include any RGB to CIE Luv conversion, so I decided to approximate it to &lt;a href="http://en.wikipedia.org/wiki/YCbCr"&gt;YCbCr&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;A second problem I was not aware of was the quantized values of the descriptor fields in the XML document. After checking the documentation and with the help of Carles, I figure out how to correctly recover the percentage values, which were the ones that alerted me of the problem as they were not adding up to 1.0.&lt;br /&gt;&lt;br /&gt;Once these implementation issues were solved, I started performing some visual tests from &lt;a href="http://gps-tsc.upc.es/imatge/i3media/gos/"&gt;GOS&lt;/a&gt;, the graphical interface we have developed for visual search. I realized that the results were not very good. Check for exemple this result I obtained when querying the database with the same image which is retrieved as the best result:&lt;br /&gt;&lt;br /&gt;&lt;a href="http://1.bp.blogspot.com/-LbwpxoTh5UI/TVUq-IQpbaI/AAAAAAAAAe4/3_B9BHdFD1U/s1600/16.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 400px; height: 171px;" src="http://1.bp.blogspot.com/-LbwpxoTh5UI/TVUq-IQpbaI/AAAAAAAAAe4/3_B9BHdFD1U/s400/16.png" alt="" id="BLOGGER_PHOTO_ID_5572407360798420386" border="0" /&gt;&lt;/a&gt;Clearly the results could be much better, as only the first six retrieved images include the anchor, while the database contains eleven of these type of keyframes. So I dove into the source code to debug it step by step and realized that the main problem was coming from a parameter heuristically set in the similarity metric. The MPEG-7 standard proposes a value Td=16.0 as a threshold to consider if two dominant colors are similar enough to be considered "similar". This threshold is applied on the euclidean distance between the two colors which are compared. After some debug, I realized that the turquoise color of the anchor's coat from two different images was considered as no similar by the metric due to this threshold. So I tried to increase this threshold to 32.0, with the following result:&lt;br /&gt;&lt;br /&gt;&lt;a href="http://4.bp.blogspot.com/-1bvnnwUH2Gs/TVUsVeO36RI/AAAAAAAAAfA/vZV0aGeZnSk/s1600/32.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 400px; height: 178px;" src="http://4.bp.blogspot.com/-1bvnnwUH2Gs/TVUsVeO36RI/AAAAAAAAAfA/vZV0aGeZnSk/s400/32.png" alt="" id="BLOGGER_PHOTO_ID_5572408861345179922" border="0" /&gt;&lt;/a&gt;The new configuration retrieved much better results. Maybe the fact that I am using the YCbCr color space instead of the recommended  CIE Luv space is the reason why the threshold 32.0 is doing much better than the proposed 16.0. In any case, I run some more values for the same experiment and finally estimated that the most appealing visual results are obtained with Td=42.0, as shown in the figure below.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://3.bp.blogspot.com/-Go-40JuBQSI/TVUtE7Ice3I/AAAAAAAAAfI/uJDbtwg0FS8/s1600/42.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 400px; height: 175px;" src="http://3.bp.blogspot.com/-Go-40JuBQSI/TVUtE7Ice3I/AAAAAAAAAfI/uJDbtwg0FS8/s400/42.png" alt="" id="BLOGGER_PHOTO_ID_5572409676556696434" border="0" /&gt;&lt;/a&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-4710800177617150340?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/4710800177617150340/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/02/threshold-for-mpeg-7-dominant-color.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/4710800177617150340'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/4710800177617150340'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/02/threshold-for-mpeg-7-dominant-color.html' title='Threshold Optimization for MPEG-7 Dominant Color Similarity in YCbCr'/><author><name>Xavi Giró-i-Nieto</name><uri>https://profiles.google.com/110271653716466055491</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh5.googleusercontent.com/--EaBZ1Bzzsg/AAAAAAAAAAI/AAAAAAAAAwU/4J6xIhZ3Xyo/s512-c/photo.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/-gwcz-pg1WHo/TVUulzp5TYI/AAAAAAAAAfQ/v8dfXRFiQyo/s72-c/600px-YCbCr-CbCr_Scaled_Y50.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-4624015850095385152</id><published>2011-02-10T18:49:00.007+01:00</published><updated>2011-03-01T16:24:28.058+01:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Manel'/><category scheme='http://www.blogger.com/atom/ns#' term='classification'/><category scheme='http://www.blogger.com/atom/ns#' term='webservices'/><title type='text'>Web architecture of a classification system</title><content type='html'>&lt;div style="text-align: center;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;In the context of the i3media project we are close to complete the automatic image annotation system line of work. At this stage of the project we have all the necessary tools to publish the web service and to be used by the users. This paper is intended to lay the foundations of the service architecture and thus to clarify all the possibilities that it provides.&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;&lt;b&gt;&lt;i&gt;Model training&lt;/i&gt;&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;A model contains the parameters necessary for a classifier to decide whether an image belongs or not to a certain class. These models are built by training a classifier by a set of images which have been previously annotated as belonging or not to the class that is being modeled.&lt;br /&gt;&lt;br /&gt;The modeled classes are intended to represent semantic concepts which are to be automatically detected by the system. These semantic classes are defined in an ontology. The implemented system will deal with several ontologies, each of them containing a set of classes that can be found in the same domain. The users of the system must be able to define these domains and the modeled classes.&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;&lt;br /&gt;&lt;/span&gt;A user who wants to define a model must make the manual annotation of a set of image representing and not representing the class. This manual annotation will be saved in the server to be able to reuse these annotations in case the model wants to be retrained with new (less) samples. It should be noted that an image can belong to several classes annotations.&lt;br /&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;&lt;span class="Apple-style-span" style="color: rgb(0, 0, 238);font-family:Georgia,serif;" &gt;&lt;img src="http://3.bp.blogspot.com/-qWMp72vr9Ck/TVU9PpXNIpI/AAAAAAAAAIY/p9j9yv6fLM4/s320/BLOG01.PNG" alt="" id="BLOGGER_PHOTO_ID_5572427452951372434" style="margin: 0px auto 10px; display: block; text-align: center; cursor: pointer; width: 320px; height: 195px;" border="0" /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;div style="text-align: justify;"&gt;The annotations are used by the model builder to train the classifier and build the model. As a result, a model can be generated for every class.&lt;br /&gt;&lt;br /&gt;At this point, it is also possible to estimate the performance of the classifier by dividing the train set in two: a smaller train set and a test set. Evaluation measures such us precision, recall or f-measure can be estimated through cross validation and, by doing so, providing the user with a measure of the expected quality of the automatic annotations. This technique was widely developed through &lt;a href="http://bitsearch.blogspot.com/search/label/Mireia"&gt;Mireia's thesis&lt;/a&gt;&lt;br /&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;&lt;b&gt;&lt;i&gt;Image Classification&lt;/i&gt;&lt;/b&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span"&gt;Natural user of the service will want to detect and automatically annotate an image. He will select a file system image, the class id of the class he wants to detect (equals to 0 if wants to classifier with all classes in the ontology)  and the ontology (model) to be used. Once on the server that info will be passed by the classification engine with the corresponding models.&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span"&gt;The client response is a positive annotation with the detection confidence score of the class (the client can set a minimum score to be sent).  At this point the user has the possibility of using this annotation to train the classifier or modify it if necessary.&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span"&gt;&lt;span class="Apple-style-span" style="color: rgb(0, 0, 238);font-family:Georgia,serif;" &gt;&lt;img src="http://3.bp.blogspot.com/-0z7sD-IfTeA/TVU9j5zcOwI/AAAAAAAAAIg/4aVN_BoxVMU/s320/BLOG02.png" alt="" id="BLOGGER_PHOTO_ID_5572427800962153218" style="margin: 0px auto 10px; display: block; text-align: center; cursor: pointer; width: 320px; height: 195px;" border="0" /&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span"&gt;This feedback and self-learning system models allow an improvement of efficiency of the classifier engine with the collaboration of multiple users. However, we must manage somehow this update because if the classifier is working properly will not need to be updated with new redundant annotations.&lt;/span&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-4624015850095385152?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/4624015850095385152/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/02/web-architecture-of-classification.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/4624015850095385152'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/4624015850095385152'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/02/web-architecture-of-classification.html' title='Web architecture of a classification system'/><author><name>Manel</name><uri>http://www.blogger.com/profile/13861846623917028915</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://3.bp.blogspot.com/-qWMp72vr9Ck/TVU9PpXNIpI/AAAAAAAAAIY/p9j9yv6fLM4/s72-c/BLOG01.PNG' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-543911529766021170</id><published>2011-02-08T14:20:00.009+01:00</published><updated>2011-02-08T15:24:59.951+01:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Eli'/><category scheme='http://www.blogger.com/atom/ns#' term='supervised learning'/><category scheme='http://www.blogger.com/atom/ns#' term='classification'/><title type='text'>Supervised, unsupervised and semi-supervised learning</title><content type='html'>&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style=";font-family:Times;font-size:medium;"  &gt;&lt;span style=";font-family:arial;font-size:85%;"  &gt;In this post I will discuss the pros and cons of some machine learning procedures and how would they suit shot type detection at the &lt;a href="http://www.ccma.cat/"&gt;CCMA&lt;/a&gt; (Catalan Broadcasting Corporation). I will focus on three main categories: supervised, unsupervised and semi-supervised learning.&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style=";font-family:Times;font-size:medium;"  &gt;&lt;span style=";font-family:arial;font-size:85%;"  &gt;The difference between these types of learning is whether the training data has been hand-labeled or not to generate the classifier’s output.&lt;a href="http://en.wikipedia.org/wiki/Supervised_learning"&gt; Supervised learning&lt;/a&gt; assumes that a set of training data has been provided. On the other hand, &lt;a href="http://en.wikipedia.org/wiki/Unsupervised_learning"&gt;unsupervised learning&lt;/a&gt; has no labeled data.  A combination of both labeled and unlabeled data results in &lt;a href="http://en.wikipedia.org/wiki/Semi-supervised_learning"&gt;semi-supervised learning&lt;/a&gt;.&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;/div&gt;&lt;span class="Apple-style-span" style=";font-family:Times;font-size:medium;"  &gt;&lt;br /&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style=";font-family:Times;font-size:medium;"  &gt;&lt;div style="margin: 0px; color: rgb(0, 0, 0);"&gt;&lt;span class="Apple-style-span"&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;a href="http://1.bp.blogspot.com/_IYWQhgQGEH4/TVFDb2zemtI/AAAAAAAAAAw/DESLoNlwvjI/s1600/TRAINING%2BDATA.jpg" style="text-decoration: none;"&gt;&lt;img src="http://1.bp.blogspot.com/_IYWQhgQGEH4/TVFDb2zemtI/AAAAAAAAAAw/DESLoNlwvjI/s400/TRAINING%2BDATA.jpg" alt="" id="BLOGGER_PHOTO_ID_5571308359880973010" style="margin: 0px auto 10px; display: block; text-align: center; cursor: pointer; width: 400px; height: 188px;" border="0" /&gt;&lt;/a&gt;&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style=";font-family:Times;font-size:medium;"  &gt;&lt;span style="font-weight: bold;font-family:arial;" &gt;&lt;br /&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style=";font-family:Times;font-size:medium;"  &gt;&lt;span style="font-weight: bold;font-family:arial;" &gt;&lt;span style="font-size:85%;"&gt;What does that imply?&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style=";font-family:Times;font-size:medium;"  &gt;&lt;span style=";font-family:arial;font-size:85%;"  &gt;In supervised learning, a qualified person would properly label by hand the set of instances to be used for training. This means professionals at the documentation department would be required for this task. The more labeled instances the classifier gets the more precise will be the output, so the amount of hand labeling needed to reach a desired quality &lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style=";font-family:Times;font-size:medium;"  &gt;&lt;span style=";font-family:arial;font-size:85%;"  &gt;is certainly time-consuming &lt;/span&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style=";font-family:Times;font-size:medium;"  &gt;&lt;span style=";font-family:arial;font-size:85%;"  &gt; The advantage is that the system can be controlled better: shot types can be defined and it can be selected which images are of interest. For example, pictures with no people can be avoided by labeling them as “not relevant shot type” so not relevant shot type pictures will not be shown on a search.&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style=";font-family:Times;font-size:medium;"  &gt;&lt;span style=";font-family:arial;font-size:85%;"  &gt;Unsupervised learning, having no labeled data, attempts to find out similar patterns in the data to determine the output. This type of learning needs nobody for the training process but this also means no interactivity: the system has no clue on which shot types are of CCMA’s interest and will define different classes &lt;a href="http://en.wikipedia.org/wiki/Clustering_algorithm"&gt;clustering&lt;/a&gt; similar data depending on the visual content. This kind of learning wouldn’t suit the CCMA needs as different shot types have to be defined depending on the context: a soccer close-up shot would have a different name than a parliament close-up shot.&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style=";font-family:Times;font-size:medium;"  &gt;&lt;span style=";font-family:arial;font-size:85%;"  &gt;Finally, semi-supervised learning is actually a supervised method that avoids labeling a large number of instances. This is done by using some of the labeled data to help the classifier labeling the unlabeled data. Then, this automatic labeled data is also used by the training process. Another supervised method that helps mining labeled data is called &lt;a href="http://en.wikipedia.org/wiki/Active_learning_%28machine_learning%29"&gt;active learning.&lt;/a&gt; Basically, it decides which data should be labeled to improve the classifier performance with less data. These two options are really interesting, as they have the benefits of both supervised and unsupervised learning: interactivity and taking advantage of unlabeled data. With a few labeled instances and the great amount of unlabeled images at our disposal this system could perform well for shot type detection. &lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-543911529766021170?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/543911529766021170/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/02/supervised-unsupervised-and-semi.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/543911529766021170'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/543911529766021170'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/02/supervised-unsupervised-and-semi.html' title='Supervised, unsupervised and semi-supervised learning'/><author><name>Eli Carcel</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://1.bp.blogspot.com/_IYWQhgQGEH4/TVFDb2zemtI/AAAAAAAAAAw/DESLoNlwvjI/s72-c/TRAINING%2BDATA.jpg' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-1436573858680197395</id><published>2011-02-01T18:19:00.007+01:00</published><updated>2011-02-15T16:02:22.820+01:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='streaming'/><category scheme='http://www.blogger.com/atom/ns#' term='Marcel'/><title type='text'>Introduction to the Gstreamer System Architecture</title><content type='html'>&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://ioshoy.com/wp-content/uploads/vlc.png"&gt;&lt;img style="margin: 0pt 0pt 10px 10px; float: right; cursor: pointer; width: 165px; height: 165px;" src="http://ioshoy.com/wp-content/uploads/vlc.png" alt="" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;In &lt;a href="http://bitsearch.blogspot.com/2010/11/streaming-with-vlc-directly-to-web.html"&gt;my last post&lt;/a&gt; I wrote some different things about &lt;a href="http://www.videolan.org/"&gt;VLC&lt;/a&gt;, which I wanted to use for streaming purposes. I had to investigate what library it used. Actually, there is one library called  "&lt;a href="http://www.live555.com/liveMedia/"&gt;LIVE 555 streaming media&lt;/a&gt;" which the cone, VLC, is using.&lt;br /&gt;Really, it was very difficult for me to find information for getting started.&lt;br /&gt;&lt;br /&gt;For this reason, it was proposed to me, to investigate a bit about another low-level library called &lt;a href="http://gstreamer.freedesktop.org/"&gt;Gstreamer&lt;/a&gt; and written in C, for streaming and playing multimedia, which is used in several applications.&lt;br /&gt;&lt;br /&gt;The first impression was really good. There is a &lt;a href="http://gstreamer.freedesktop.org/data/doc/gstreamer/head/manual/html/index.html"&gt;first manual&lt;/a&gt; in which you can learn the basic features of Gstreamer, how to initialize it, how to put it in different state, what is a pipeline, what is an element, and finally a OGG player, as a "Hello World" application to apply the concepts already learnt.&lt;br /&gt;&lt;br /&gt;As a result of my investigation, I'm going to summarize what I've learnt in this Gstreamer Manual.&lt;br /&gt;&lt;br /&gt;The main use of Gstreamer is to manage the video and audio stream to create, for example, a video player, or a streaming server.&lt;br /&gt;&lt;br /&gt;&lt;div style="text-align: left;"&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://longomatch.ylatuya.es/image/gstreamer-logo.png"&gt;&lt;img style="margin: 0pt 10px 10px 0pt; float: left; cursor: pointer; width: 403px; height: 117px;" src="http://longomatch.ylatuya.es/image/gstreamer-logo.png" alt="" border="0" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;An&lt;span style="font-weight: bold;"&gt; element&lt;/span&gt; is the most important class of objects in Gstreamer. Usually, a functionality is achieved by linking elements.&lt;br /&gt;&lt;br /&gt;Normally, inside the elements, there are &lt;span style="font-weight: bold;"&gt;pads&lt;/span&gt;, which are inputs and outputs. The input pad is called "&lt;span style="font-weight: bold;"&gt;sink&lt;/span&gt;" and is from where data flows trough the element, for example, from a local file, or from another element. The output pad, is called "&lt;span style="font-weight: bold;"&gt;source&lt;/span&gt;".&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://gstreamer.freedesktop.org/data/doc/gstreamer/head/manual/html/images/filter-element.png"&gt;&lt;img style="margin: 0pt 10px 10px 0pt; float: left; cursor: pointer; width: 155px; height: 101px;" src="http://gstreamer.freedesktop.org/data/doc/gstreamer/head/manual/html/images/filter-element.png" alt="" border="0" /&gt;&lt;/a&gt;Here we can see an example of an elements with two pads. An input pad, and an output pad.&lt;br /&gt;&lt;div style="text-align: justify;"&gt;This is an element called&lt;span style="font-style: italic;"&gt; filter&lt;/span&gt;, with input and output pads.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Besides elements, there is a kind of container called &lt;span style="font-weight: bold;"&gt;bin&lt;/span&gt;, which would be the parent of some elements. And then, there is a type of bin, called &lt;span style="font-weight: bold;"&gt;pipeline&lt;/span&gt;.&lt;br /&gt;This is a hierarchical structure for one important reason. A medium-sized project with gstreamer will contain a lot of elements, imagine that you want to put the "play" state. You should tell to each element. This method allows you to tell the command to the pipeline and it will advise all its elements.&lt;br /&gt;&lt;br /&gt;There are also buses and message reporting functionalities which I'm not going to explain in this post because it will be too long.&lt;br /&gt;&lt;br /&gt;Finally, as an exemple of the above explained, we have the first "&lt;a href="http://gstreamer.freedesktop.org/data/doc/gstreamer/head/manual/html/chapter-helloworld.html"&gt;hello world" application&lt;/a&gt;, which is an audio player of the &lt;a href="http://www.vorbis.com/"&gt;Vorbis&lt;/a&gt; encoded content in an&lt;a href="http://www.xiph.org/ogg/"&gt; OGG&lt;/a&gt; file.&lt;br /&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: left;"&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://gstreamer.freedesktop.org/data/doc/gstreamer/head/manual/html/images/hello-world.png"&gt;&lt;img style="margin: 0px auto 10px; display: block; text-align: center; cursor: pointer; width: 496px; height: 93px;" src="http://gstreamer.freedesktop.org/data/doc/gstreamer/head/manual/html/images/hello-world.png" alt="" border="0" /&gt;&lt;/a&gt;&lt;/div&gt;In the next posts I'm going to explain how it functions by putting snippets of code, and extending a little bit more the bus and message functionalities.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-1436573858680197395?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/1436573858680197395/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/02/gstreamer-libraries-proposed-for.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/1436573858680197395'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/1436573858680197395'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/02/gstreamer-libraries-proposed-for.html' title='Introduction to the Gstreamer System Architecture'/><author><name>Marcel</name><uri>http://www.blogger.com/profile/17299945445492449290</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-5190360837502703806</id><published>2011-01-31T15:16:00.021+01:00</published><updated>2011-04-28T10:43:55.342+02:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Monica'/><category scheme='http://www.blogger.com/atom/ns#' term='text'/><category scheme='http://www.blogger.com/atom/ns#' term='retrieval'/><title type='text'>The vector space model for scoring</title><content type='html'>&lt;div align="justify"&gt;In my &lt;a href="http://bitsearch.blogspot.com/2011/01/ranking-documents-based-on-tf-idf.html"&gt;last post &lt;/a&gt;I developed the notion of a document vector that captures the relative importance of the terms in a document. The representation of a set of documents as vectors in a common vector space is known as the &lt;strong&gt;&lt;a href="http://en.wikipedia.org/wiki/Vector_space_model"&gt;vector space model&lt;/a&gt;&lt;/strong&gt; (or term vector model).&lt;br /&gt;&lt;br /&gt;&lt;a href="http://3.bp.blogspot.com/_tOOi3R89e74/TUeyueig7ZI/AAAAAAAAAJQ/QHL-VLEWook/s1600/vector_space.png"&gt;&lt;img style="MARGIN: 0px 0px 10px 10px; WIDTH: 242px; FLOAT: right; HEIGHT: 170px" id="BLOGGER_PHOTO_ID_5568615975808462226" alt="" src="http://3.bp.blogspot.com/_tOOi3R89e74/TUeyueig7ZI/AAAAAAAAAJQ/QHL-VLEWook/s320/vector_space.png" border="0" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div align="justify"&gt;&lt;/div&gt;&lt;a href="http://3.bp.blogspot.com/_tOOi3R89e74/TUeyueig7ZI/AAAAAAAAAJQ/QHL-VLEWook/s1600/vector_space.png"&gt;&lt;/a&gt;&lt;div align="justify"&gt;Consider the vector derived form a document with one component in the vector for each dictionary term. The set of documents in collection then may be viewed as a set of vectors in a vector space, in which there is one axis for each term. This representation loses the relative ordering of the terms in each document. This type of feature vectors are the basis for the &lt;a href="http://en.wikipedia.org/wiki/Bag_of_words"&gt;bag of words&lt;/a&gt; models.&lt;br /&gt;&lt;br /&gt;&lt;/div&gt;&lt;a href="http://3.bp.blogspot.com/_tOOi3R89e74/TUeyueig7ZI/AAAAAAAAAJQ/QHL-VLEWook/s1600/vector_space.png"&gt;&lt;/a&gt;&lt;div align="justify"&gt;&lt;strong&gt;&lt;span style="color:#33ccff;"&gt;How to quantify the similarity between two documents in this &lt;em&gt;vector space&lt;/em&gt;?&lt;/span&gt;&lt;/strong&gt;&lt;/div&gt;&lt;div align="justify"&gt; &lt;/div&gt;&lt;div align="justify"&gt;&lt;strong&gt;&lt;span style="color:#33ccff;"&gt;&lt;/span&gt;&lt;/strong&gt;&lt;/div&gt;&lt;div align="justify"&gt;&lt;/div&gt;The standard way of quantifying the similarity between two documents is to compute the &lt;strong&gt;&lt;em&gt;&lt;a href="http://en.wikipedia.org/wiki/Cosine_similarity"&gt;cosine similarty&lt;/a&gt;&lt;/em&gt;&lt;/strong&gt; of their vector representations&lt;br /&gt;&lt;div style="TEXT-ALIGN: justify"&gt;&lt;img style="TEXT-ALIGN: center; MARGIN: 0px auto 10px; WIDTH: 255px; DISPLAY: block; HEIGHT: 67px" id="BLOGGER_PHOTO_ID_5568358869059774978" alt="" src="http://4.bp.blogspot.com/_tOOi3R89e74/TUbI45Q0KgI/AAAAAAAAAIw/Vnnt4qR8xDw/s320/similarity.png" border="0" /&gt;where the numerator represents the &lt;a href="http://en.wikipedia.org/wiki/Dot_product"&gt;dot product &lt;/a&gt;of the vectors, while the denominator is the product of their&lt;em&gt; &lt;strong&gt;&lt;a href="http://en.wikipedia.org/wiki/Euclidean_distance"&gt;Euclidean lengths&lt;/a&gt;&lt;/strong&gt;&lt;/em&gt;. Let V(d) denote the document vector for &lt;em&gt;d&lt;/em&gt;, with &lt;em&gt;M&lt;/em&gt; components. The Euclidean length of d is defined to be:&lt;img style="TEXT-ALIGN: center; MARGIN: 0px auto 10px; WIDTH: 88px; DISPLAY: block; HEIGHT: 76px" id="BLOGGER_PHOTO_ID_5568365008796035682" alt="" src="http://2.bp.blogspot.com/_tOOi3R89e74/TUbOeRkAqmI/AAAAAAAAAJI/FBPxJ15eIY0/s320/euclideanlength.png" border="0" /&gt;The effect of the denominator of the similarity equation is thus to &lt;em&gt;length-normalize&lt;/em&gt; the vectors V(d1) and V(d2) to unit vectors:&lt;img style="TEXT-ALIGN: center; MARGIN: 0px auto 10px; WIDTH: 320px; DISPLAY: block; HEIGHT: 81px" id="BLOGGER_PHOTO_ID_5568363803546185970" alt="" src="http://4.bp.blogspot.com/_tOOi3R89e74/TUbNYHqDcPI/AAAAAAAAAJA/UU4nNcdeINg/s320/unitvector.png" border="0" /&gt;So I can then rewrite it as:&lt;img style="TEXT-ALIGN: center; MARGIN: 0px auto 10px; WIDTH: 249px; DISPLAY: block; HEIGHT: 38px" id="BLOGGER_PHOTO_ID_5568362249797452722" alt="" src="http://2.bp.blogspot.com/_tOOi3R89e74/TUbL9rffQ7I/AAAAAAAAAI4/FEobT5dCUlg/s320/similarity2.png" border="0" /&gt;Thus, it can be viewed as the dot product of the normalized versions of the two document vectors. This measure is the cosine of the angle between the two vectors in the following figure.&lt;/div&gt;&lt;div style="TEXT-ALIGN: justify"&gt;&lt;/div&gt;&lt;div style="TEXT-ALIGN: justify"&gt;&lt;/div&gt;&lt;div style="TEXT-ALIGN: justify"&gt;&lt;/div&gt;&lt;div style="TEXT-ALIGN: justify"&gt;&lt;/div&gt;&lt;div style="TEXT-ALIGN: justify"&gt;&lt;/div&gt;&lt;div style="TEXT-ALIGN: justify"&gt;&lt;/div&gt;&lt;div style="TEXT-ALIGN: justify"&gt;&lt;/div&gt;&lt;div style="TEXT-ALIGN: justify"&gt;&lt;/div&gt;&lt;div style="TEXT-ALIGN: justify"&gt;&lt;/div&gt;&lt;div style="TEXT-ALIGN: justify"&gt;&lt;/div&gt;&lt;div style="TEXT-ALIGN: justify"&gt;&lt;/div&gt;&lt;div style="TEXT-ALIGN: justify"&gt;&lt;/div&gt;&lt;div style="TEXT-ALIGN: justify"&gt;&lt;/div&gt;&lt;div style="TEXT-ALIGN: justify"&gt;&lt;img style="TEXT-ALIGN: center; MARGIN: 0px auto 10px; WIDTH: 292px; DISPLAY: block; HEIGHT: 280px" id="BLOGGER_PHOTO_ID_5568618593225395058" alt="" src="http://2.bp.blogspot.com/_tOOi3R89e74/TUe1G1LVy3I/AAAAAAAAAJY/qmb9A2DIYTQ/s320/vector_space_example.png" border="0" /&gt;&lt;/div&gt;&lt;div style="TEXT-ALIGN: justify"&gt; &lt;/div&gt;&lt;div style="TEXT-ALIGN: justify"&gt;This similarity measure is used to retrieval similar documents to a particular document in a collection. For example, given a document d, a user wants to finding the documents most similar to d. The search systems will retrieve the documents (di) with highest dot products. They could do this by computing the dot product (&lt;em&gt;v(d)·v(di)&lt;/em&gt;) between v(d) and each v(d1), ..., v(dn). Finally the user will obtain a ranked list based on this measure.&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-5190360837502703806?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/5190360837502703806/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/01/vector-space-model-for-scoring.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/5190360837502703806'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/5190360837502703806'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/01/vector-space-model-for-scoring.html' title='The vector space model for scoring'/><author><name>Monica</name><uri>http://www.blogger.com/profile/04558100039934227329</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://3.bp.blogspot.com/_tOOi3R89e74/TUeyueig7ZI/AAAAAAAAAJQ/QHL-VLEWook/s72-c/vector_space.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-697342171267641260</id><published>2011-01-28T00:38:00.019+01:00</published><updated>2011-02-02T17:19:44.425+01:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='evaluation'/><category scheme='http://www.blogger.com/atom/ns#' term='Eli'/><category scheme='http://www.blogger.com/atom/ns#' term='classification'/><category scheme='http://www.blogger.com/atom/ns#' term='identification'/><title type='text'>Automatic video shot type identification by Wang et al</title><content type='html'>&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span" style="font-family: arial; "&gt;My name is Eli Carcel and this will be my first contribution to this blog. As I am now starting my bachelor thesis on keyframe-based shot identification I have been s&lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: arial; "&gt;tudying the state of the art. I found a related paper called "&lt;i&gt;&lt;a href="http://www.springerlink.com/content/e25146466j4166rl/"&gt;Automatic&lt;/a&gt;&lt;/i&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: arial; "&gt;&lt;i&gt;&lt;a href="http://www.springerlink.com/content/e25146466j4166rl/"&gt; Video Shot Size Annotation Scheme&lt;/a&gt; by Wang et al (2006)"&lt;/i&gt; which I will briefly describe.&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;&lt;b&gt;&lt;br /&gt;&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;The aim of the project is recognizing an image shot type pattern, assuming there are three possible categories: Close-up, Medium shot and Long shot. The project steps include: feature extraction, training, classification and decision.&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: center;"&gt;&lt;span class="Apple-style-span"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;img src="http://2.bp.blogspot.com/_IYWQhgQGEH4/TUIEQuXLgOI/AAAAAAAAAAU/Tyu01JaigB0/s400/scheme.jpg" style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 400px; height: 101px;" border="0" alt="" id="BLOGGER_PHOTO_ID_5567016774753026274" /&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt; &lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: arial; "&gt; &lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;b&gt;Process&lt;/b&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;b&gt;&lt;br /&gt;&lt;/b&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;The first step is feature extraction where two &lt;a href="http://en.wikipedia.org/wiki/Feature_vector"&gt;feature vectors&lt;/a&gt; are created, one corresponding to low level features such as color, edge and texture and the other one to mid level features involving regions and its features.&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: center;"&gt;&lt;b&gt;&lt;br /&gt;&lt;/b&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;&lt;a href="http://en.wikipedia.org/wiki/Semi-supervised_learning"&gt;Semi-supervised learning&lt;/a&gt; is needed in order to obtain valid data to train the system. &lt;a href="http://en.wikipedia.org/wiki/Co-training"&gt;Co-training&lt;/a&gt; can help boosting the training process by offering semi-supervised annotation. &lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt; &lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;b&gt;&lt;br /&gt;&lt;/b&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;Each vector of features is sent to a different &lt;a href="http://en.wikipedia.org/wiki/Classifier_(mathematics)"&gt;classifier&lt;/a&gt; (low level and mid level) to decide which shot size suits best the image among the three existent classes. &lt;/span&gt;A combination of both classifiers’ results is used in order to improve its efficency. &lt;a href="http://wekadocs.com/node/15"&gt;Cost-sensitive&lt;/a&gt; decisions for misclassifications are calculated to provide a better cost minimization based classifier.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;&lt;b&gt;&lt;br /&gt;&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;&lt;b&gt;&lt;br /&gt;&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;&lt;b&gt;Experimental results&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;&lt;b&gt;&lt;br /&gt;&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;Tests are done using 20 hours of video extracted from home video camcorders and then divided into 1000 shots according to timestamps. As in this stage some shots may include images with several shot sizes, each shot is divided into 4000 sub-shots, assuming now identical shot size within a sub-shot. &lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;All results are the average of 10 runs, where a run is an iteration done by using 20% of the samples randomly selected as training data and the other 80% to test the system.&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt; &lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt; &lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;&lt;span class="Apple-style-span" style="font-family: Georgia, serif; "&gt;&lt;img src="http://4.bp.blogspot.com/_IYWQhgQGEH4/TUIERU4awGI/AAAAAAAAAAk/8a9NSKZ_-DQ/s400/results.jpg" border="0" alt="" id="BLOGGER_PHOTO_ID_5567016785092984930" style="display: block; margin-top: 0px; margin-right: auto; margin-bottom: 10px; margin-left: auto; text-align: center; cursor: pointer; width: 400px; height: 301px; " /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span class="Apple-style-span"&gt;The paper shows how low level descriptors work better in shot type identification, though it &lt;/span&gt;can be pursued a better performance using a combined low level and mid level decision.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-697342171267641260?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/697342171267641260/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/01/automatic-video-shot-size-annotation.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/697342171267641260'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/697342171267641260'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/01/automatic-video-shot-size-annotation.html' title='Automatic video shot type identification by Wang et al'/><author><name>Eli Carcel</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://2.bp.blogspot.com/_IYWQhgQGEH4/TUIEQuXLgOI/AAAAAAAAAAU/Tyu01JaigB0/s72-c/scheme.jpg' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-7685433173737455272</id><published>2011-01-18T13:49:00.015+01:00</published><updated>2011-06-30T15:15:40.863+02:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='RAMON'/><category scheme='http://www.blogger.com/atom/ns#' term='teaching'/><category scheme='http://www.blogger.com/atom/ns#' term='xavi'/><category scheme='http://www.blogger.com/atom/ns#' term='thesis'/><category scheme='http://www.blogger.com/atom/ns#' term='Cristina'/><title type='text'>Awards for Ramon and Cristina's thesis</title><content type='html'>Year 2010 was excellent in terms of the quantity and quality of &lt;a href="http://gps-tsc.upc.es/imatge/_Xgiro/teaching/thesis/index.html"&gt;thesis&lt;/a&gt; I supervised. Five academic works were defended, four of them qualified with an A with honors. Apart from being very satisfied for the good job of all students, I am especially satisfied for the public recognition that two of them have received.&lt;br /&gt;&lt;br /&gt;Last October &lt;a href="http://bitsearch.blogspot.com/search/label/RAMON"&gt;Ramon Salla Rovira&lt;/a&gt; received&lt;a href="http://4.bp.blogspot.com/_pLbB-QouQ9g/TTWWTA2MgCI/AAAAAAAAAes/hNXO0cUllGM/s1600/CCMA.jpg"&gt;&lt;img style="float: right; margin: 0pt 0pt 10px 10px; cursor: pointer; width: 200px; height: 46px;" src="http://4.bp.blogspot.com/_pLbB-QouQ9g/TTWWTA2MgCI/AAAAAAAAAes/hNXO0cUllGM/s200/CCMA.jpg" alt="" id="BLOGGER_PHOTO_ID_5563518168075108386" border="0" /&gt;&lt;/a&gt; one of the &lt;a href="http://www.etsetb.upc.edu/es/mason-share/notif/1675.html"&gt;awards&lt;/a&gt; given by companies to the best thesis in the &lt;a href="http://www.etsetb.upc.edu/"&gt;Telecom BCN (ETSETB)&lt;/a&gt;. His &lt;a href="http://hdl.handle.net/2099.1/8766"&gt;work&lt;/a&gt; on the development of a web client for multimodal video retrieval was considered the best thesis in the area of Telematics by &lt;a href="http://www.accenture.com/"&gt;Acc&lt;/a&gt;&lt;a href="http://www.accenture.com/"&gt;entu&lt;/a&gt;&lt;a href="http://www.accenture.com/"&gt;re&lt;/a&gt;. Ramon developed his thesis while working at &lt;a href="http://www.ccma.cat/inici/inici_eng.htm"&gt;CCMA&lt;/a&gt;, so it was a joint work from the university and industry. In this case the supervision was shared between &lt;a href="http://es.linkedin.com/in/xaviervives"&gt;Xavier Vives&lt;/a&gt; and me.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://1.bp.blogspot.com/_pLbB-QouQ9g/TTWVjpff3II/AAAAAAAAAek/2QxQIkpIPLA/s1600/activa_multimedia.gif"&gt;&lt;img style="float: left; margin: 0pt 10px 10px 0pt; cursor: pointer; width: 196px; height: 94px;" src="http://1.bp.blogspot.com/_pLbB-QouQ9g/TTWVjpff3II/AAAAAAAAAek/2QxQIkpIPLA/s400/activa_multimedia.gif" alt="" id="BLOGGER_PHOTO_ID_5563517354352041090" border="0" /&gt;&lt;/a&gt;&lt;a href="http://bitsearch.blogspot.com/search/label/Cristina"&gt;Cristina Ruiz Sancho&lt;/a&gt; also received many compliments for her work during her studies and her thesis. In the graduation day at the &lt;a href="http://www.eet.upc.edu/"&gt;Engineering School of Terrassa (EET)&lt;/a&gt; she was recognized as the best student among all the five undergraduate programs in the school and her thesis was selected to represent the school in the awards organized by the &lt;a href="http://www.coitt.es/"&gt;Spanish Association of Technical Telecommunication Engineers&lt;/a&gt;. Her &lt;a href="http://gps-tsc.upc.es/imatge/_Xgiro/teaching/thesis/2009-2010/CristinaRuiz/memoria.pdf"&gt;work&lt;/a&gt; on bringing the &lt;a href="http://twitter.com/"&gt;Twitter&lt;/a&gt; service to social TV received the &lt;a href="http://www.coitt.es/index.php?page=noticias_coitt_reg&amp;amp;ireg=0&amp;amp;icod=263"&gt;second prize&lt;/a&gt; in this prestigious competition. &lt;a href="http://www.ccma.cat/"&gt;CCMA&lt;/a&gt; can be very proud of their technical divisions because Cristina also developed her thesis there under the joint supervision of &lt;a href="http://es.linkedin.com/in/cucucat"&gt;Eduard Cucurella&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://1.bp.blogspot.com/_pLbB-QouQ9g/TTWUo39alpI/AAAAAAAAAeU/k-L_VrYro24/s1600/IMG_1484.JPG"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 400px; height: 299px;" src="http://1.bp.blogspot.com/_pLbB-QouQ9g/TTWUo39alpI/AAAAAAAAAeU/k-L_VrYro24/s400/IMG_1484.JPG" alt="" id="BLOGGER_PHOTO_ID_5563516344623339154" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;Ramon and Cristina did an excellent work during their thesis, but their success can only be understood as the tip of a much larger iceberg. Their enthusiasm, talent and ambition was successfully supported by their colleagues at CCMA as well as the teaching and research team at the UPC. For this reason I believe that more excellent thesis are to come soon. &lt;a href="http://bitsearch.blogspot.com/search/label/Monica"&gt;Mónica&lt;/a&gt; will defend hers next Monday, and &lt;a href="http://bitsearch.blogspot.com/search/label/Aida"&gt;Aida&lt;/a&gt; and &lt;a href="http://bitsearch.blogspot.com/search/label/Neus"&gt;Neus&lt;/a&gt; are in the final stages of theirs. I wish the example given my Ramon and Cristina will encourage them during these last weeks of hard work.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-7685433173737455272?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/7685433173737455272/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/01/ramon-salla-and-cristina-ruiz-are.html#comment-form' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/7685433173737455272'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/7685433173737455272'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/01/ramon-salla-and-cristina-ruiz-are.html' title='Awards for Ramon and Cristina&apos;s thesis'/><author><name>Xavi Giró-i-Nieto</name><uri>https://profiles.google.com/110271653716466055491</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='//lh5.googleusercontent.com/--EaBZ1Bzzsg/AAAAAAAAAAI/AAAAAAAAAwU/4J6xIhZ3Xyo/s512-c/photo.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/_pLbB-QouQ9g/TTWWTA2MgCI/AAAAAAAAAes/hNXO0cUllGM/s72-c/CCMA.jpg' height='72' width='72'/><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-3424438961136052997</id><published>2011-01-14T21:11:00.009+01:00</published><updated>2011-04-28T10:44:25.718+02:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Monica'/><category scheme='http://www.blogger.com/atom/ns#' term='text'/><title type='text'>Ranking text documents based on TF-IDF</title><content type='html'>I introduce the concept of TF-IDF &lt;span style="font-style: italic;"&gt;(term frequency-inverse document frequency)&lt;/span&gt; in one of my first &lt;a href="http://bitsearch.blogspot.com/2010/06/tf-idf.html"&gt;post&lt;/a&gt; but, in this post, I will examine the structure of it for a set of documents.&lt;br /&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;The TF-IDFf can be seen as a textual descriptor for text documents. And this concept has been extended to the analysis of images, how said Laura in this &lt;a href="http://bitsearch.blogspot.com/2010/08/summary-of-video-google-paper-by-sivic.html"&gt;post&lt;/a&gt;.&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;div style="text-align: justify;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;span style="font-weight: bold; color: rgb(0, 204, 204);"&gt;Term frequency&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;A &lt;span style="font-weight: bold;"&gt;weight &lt;/span&gt;is assigned to each term in a document according to the number of occurences of the term in the document. This weight is &lt;span style="line-height: 115%;font-family:&amp;quot;;font-size:11pt;"  &gt;&lt;!--[endif]--&gt;&lt;/span&gt;&lt;!--[endif]--&gt;&lt;span style="" lang="EN-US"&gt;&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;  referred to as &lt;span style="font-weight: bold;"&gt;term frequency&lt;/span&gt;. The simplest approach is to assign the weight to be equal to the number of occurrences of term &lt;span style="font-style: italic;"&gt;t &lt;/span&gt;in document&lt;span style="font-style: italic;"&gt; d&lt;/span&gt;.&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/_tOOi3R89e74/TTDEkav0R1I/AAAAAAAAAIY/X7dRo1T6DeI/s1600/basic_tf.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 209px; height: 47px;" src="http://4.bp.blogspot.com/_tOOi3R89e74/TTDEkav0R1I/AAAAAAAAAIY/X7dRo1T6DeI/s320/basic_tf.png" alt="" id="BLOGGER_PHOTO_ID_5562161669736777554" border="0" /&gt;&lt;/a&gt;But some other common approaches are:&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://1.bp.blogspot.com/_tOOi3R89e74/TTDEkCwOtEI/AAAAAAAAAIQ/Fn-VYryhOO8/s1600/common_tf.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 223px; height: 149px;" src="http://1.bp.blogspot.com/_tOOi3R89e74/TTDEkCwOtEI/AAAAAAAAAIQ/Fn-VYryhOO8/s320/common_tf.png" alt="" id="BLOGGER_PHOTO_ID_5562161663296058434" border="0" /&gt;&lt;/a&gt;&lt;span style="font-weight: bold; color: rgb(0, 204, 204);"&gt;Inverse document frequency&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;Term frequency suffers from a critical problem: all terms are considered equally important when it comes to  assessing relevance on a query. In fact, certain terms have little power in characterizing the document. For instance, the &lt;span style="font-style: italic;"&gt;stopwords&lt;/span&gt; that are meaningless terms such as articles, pronouns, prepositions, etc.&lt;br /&gt;&lt;br /&gt;So &lt;span style="font-weight: bold;"&gt;document frequency&lt;/span&gt; &lt;span style="font-style: italic;"&gt;(df)&lt;/span&gt; defined as the number of documents in the collection that contain a term &lt;span style="font-style: italic;"&gt;t&lt;/span&gt;, is introduced for a attenuating the effect of terms that occur to often in the collection to be meaningful for relevance determination. The solution for this problem is attenuating the tf with the &lt;span style="font-weight: bold;"&gt;inverse document frequency (idf)&lt;/span&gt;&lt;span&gt;, which is defined as&lt;/span&gt;:&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://3.bp.blogspot.com/_tOOi3R89e74/TTDCfbYzIXI/AAAAAAAAAH4/MBW7XzPCA1w/s1600/idf.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 149px; height: 75px;" src="http://3.bp.blogspot.com/_tOOi3R89e74/TTDCfbYzIXI/AAAAAAAAAH4/MBW7XzPCA1w/s320/idf.png" alt="" id="BLOGGER_PHOTO_ID_5562159384986067314" border="0" /&gt;&lt;/a&gt;where &lt;span style="font-style: italic;"&gt;N &lt;/span&gt;is the number of documents in a collection. Thus the idf of a rare term is high, whereas the idf of a frequent terms is likely to be low.&lt;br /&gt;&lt;br /&gt;&lt;span style="font-weight: bold; color: rgb(0, 204, 204);"&gt;TF-IDF weighting&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;TF-IDF&lt;/span&gt; combines the definitions of terms frequency and inverse document frequency to produce a composite weight for each term in each document. The TF-IDF weighting scheme assigns to term t a weight in document d given by:&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/_tOOi3R89e74/TTDCf1m4XyI/AAAAAAAAAII/tCJXG2XOQYU/s1600/tfidf.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 252px; height: 48px;" src="http://2.bp.blogspot.com/_tOOi3R89e74/TTDCf1m4XyI/AAAAAAAAAII/tCJXG2XOQYU/s320/tfidf.png" alt="" id="BLOGGER_PHOTO_ID_5562159392024452898" border="0" /&gt;&lt;/a&gt;&lt;ul&gt;&lt;li&gt;It will be highest when t occurs many times within a small number of documents.&lt;/li&gt;&lt;li&gt;It will be lower when the term occurs fewer times in a document, or occurs in many documents.&lt;/li&gt;&lt;li&gt;It will be lower when the term occurs in virtually all documents.&lt;/li&gt;&lt;/ul&gt;At this point, each document can be seen as a vector with one component corresponding to each term in the dictionary. Each value in this vector coorresponds to the computed tf-idf value for the term in the document. For dictionary terms that do not occur in a document, this weight is zero.&lt;br /&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: center;"&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://1.bp.blogspot.com/_tOOi3R89e74/TTCuTLO2uzI/AAAAAAAAAHg/szkkRvhPttc/s1600/1.png"&gt;&lt;img style="cursor: pointer; width: 320px; height: 78px;" src="http://1.bp.blogspot.com/_tOOi3R89e74/TTCuTLO2uzI/AAAAAAAAAHg/szkkRvhPttc/s320/1.png" alt="" id="BLOGGER_PHOTO_ID_5562137184258407218" border="0" /&gt;&lt;/a&gt;&lt;div style="text-align: justify;"&gt;&lt;span style="font-weight: bold; color: rgb(51, 204, 255);"&gt;Possible applications&lt;/span&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/_tOOi3R89e74/TTRK0m2rKJI/AAAAAAAAAIo/VVbTqvjjbQQ/s1600/800px-Web_2_0_Map_svg.png"&gt;&lt;img style="float: right; margin: 0pt 0pt 10px 10px; cursor: pointer; width: 174px; height: 132px;" src="http://4.bp.blogspot.com/_tOOi3R89e74/TTRK0m2rKJI/AAAAAAAAAIo/VVbTqvjjbQQ/s320/800px-Web_2_0_Map_svg.png" alt="" id="BLOGGER_PHOTO_ID_5563153707352926354" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;div style="text-align: justify;"&gt;A &lt;a href="http://en.wikipedia.org/wiki/Tag_cloud"&gt;&lt;span style="font-weight: bold;"&gt;tag cloud &lt;/span&gt;&lt;/a&gt;is a visual depiction of the term content of a site, typically used to describe the content of web sites. Tags are usually single words and are normally listed alphabetically, and the importance of each tag is shown with a different font size or color. The concept of TF-IDF can be use to determine the weight of each tag.&lt;br /&gt;&lt;/div&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/_tOOi3R89e74/TTDCfYecbwI/AAAAAAAAAIA/8iwI23qguGI/s1600/score.png"&gt;&lt;br /&gt;&lt;/a&gt;&lt;/div&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-3424438961136052997?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/3424438961136052997/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/01/ranking-documents-based-on-tf-idf.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/3424438961136052997'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/3424438961136052997'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/01/ranking-documents-based-on-tf-idf.html' title='Ranking text documents based on TF-IDF'/><author><name>Monica</name><uri>http://www.blogger.com/profile/04558100039934227329</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/_tOOi3R89e74/TTDEkav0R1I/AAAAAAAAAIY/X7dRo1T6DeI/s72-c/basic_tf.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-3863299235648550014</id><published>2011-01-14T19:26:00.019+01:00</published><updated>2011-01-21T13:32:10.646+01:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Jaume'/><category scheme='http://www.blogger.com/atom/ns#' term='region'/><category scheme='http://www.blogger.com/atom/ns#' term='object'/><category scheme='http://www.blogger.com/atom/ns#' term='Architecture'/><category scheme='http://www.blogger.com/atom/ns#' term='detection'/><category scheme='http://www.blogger.com/atom/ns#' term='xavi'/><title type='text'>Temporal stabilization of bounding boxes</title><content type='html'>In the recent &lt;a href="http://bitsearch.blogspot.com/2010/12/region-local-annotations-in-use-case.html"&gt;post&lt;/a&gt;, we presented two videos showing the results of a region-based object detection. We tried to detect two different types of objects, a cap and the poker maths in a test sequence produced by our industry partners. We highlighted, using a bounding box, each detected region of interest. Finally, we generated a &lt;a href="http://www.youtube.com/watch?v=rSdcD8201Rk&amp;amp;feature=player_embedded"&gt;video&lt;/a&gt;, combining all images with their generated bounding boxes. &lt;br /&gt;&lt;br /&gt;&lt;iframe width="425" height="344" src="http://www.youtube.com/embed/wF_TQgHA8JA?fs=1" frameborder="0" allowFullScreen=""&gt;&lt;/iframe&gt;&lt;br /&gt;&lt;br /&gt;&lt;iframe width="425" height="344" src="http://www.youtube.com/embed/rSdcD8201Rk?fs=1" frameborder="0" allowFullScreen=""&gt;&lt;/iframe&gt;&lt;br /&gt;&lt;br /&gt;After analysing these videos, we realized that it was present a upsetting flickering between the bounding boxes of consecutive images. This effect was generated because every single detection was performed individually for every image, without considering the other detections in the sequence. We decided to exploit the temporal dimension to fix this issue.&lt;br /&gt;&lt;br /&gt;We used a temporal stabilizer developed by David Varas and Pedro Espinosa. This tool detects outliers and performs a spatial low level filtering of the bounding box vertices.&lt;br /&gt;&lt;br /&gt;The next diagram show the design procedure. The input sequence of XML files contains the object detections as a set of regions in the BPT which, at the same time, finally refer to file containing the partition labels of every pixel in the input image. I developed a simple Java application that translated these region IDs to the bounding box coordinates surrounding the images. The sequence of box coordinates was stabilized to finally generate the images with an overlaid rectangle around every detected object.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://4.bp.blogspot.com/_LMiS0nb36cg/TTbQiq45qCI/AAAAAAAAAE4/O8m7bLxi9V4/s1600/Sin%2Bt%25C3%25ADtulo%2B1.png"&gt;&lt;img style="margin: 0px auto 10px; display: block; text-align: center; cursor: pointer; width: 400px; height: 300px;" src="http://4.bp.blogspot.com/_LMiS0nb36cg/TTbQiq45qCI/AAAAAAAAAE4/O8m7bLxi9V4/s400/Sin%2Bt%25C3%25ADtulo%2B1.png" alt="" id="BLOGGER_PHOTO_ID_5563863683709904930" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;The resulting videos are shown below:&lt;br /&gt;&lt;br /&gt;&lt;iframe width="425" height="344" src="http://www.youtube.com/embed/gG3gKU3IKfE?fs=1" frameborder="0" allowFullScreen=""&gt;&lt;/iframe&gt;&lt;br /&gt;&lt;br /&gt;&lt;iframe src="http://www.youtube.com/embed/sEmL-w0aZvk?fs=1" allowfullscreen="" frameborder="0" height="344" width="425"&gt;&lt;/iframe&gt;&lt;br /&gt;&lt;br /&gt;Notice the multi-view nature of the detections, that have been learnt with no additional interaction from the user side. The object detector has processed all provided region-based annotations to automatically learn the amount of view and build a detector for each of them.&lt;br /&gt;&lt;br /&gt;In conclusion, comparing both videos, the one with the flickering problem and the other with the problem fixed, it can be showed a better visualization and more accurate results.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/3250503894549245556-3863299235648550014?l=bitsearch.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://bitsearch.blogspot.com/feeds/3863299235648550014/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://bitsearch.blogspot.com/2011/01/temporal-stabilizer-in-bounding-boxes.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/3863299235648550014'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/3250503894549245556/posts/default/3863299235648550014'/><link rel='alternate' type='text/html' href='http://bitsearch.blogspot.com/2011/01/temporal-stabilizer-in-bounding-boxes.html' title='Temporal stabilization of bounding boxes'/><author><name>Jaume</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://img.youtube.com/vi/wF_TQgHA8JA/default.jpg' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-3250503894549245556.post-5848762417454606948</id><published>2011-01-02T13:40:00.011+01:00</published><updated>2011-01-02T15:49:03.619+01:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='video'/><category scheme='http://www.blogger.com/atom/ns#' term='evaluation'/><category scheme='http://www.blogger.com/atom/ns#' term='Monica'/><category scheme='http://www.blogger.com/atom/ns#' term='retrieval'/><title type='text'>Relevance and Diversity Evaluation of the Ranking Algorithm</title><content type='html'>&lt;div style="text-align: justify;"&gt;&lt;div style="text-align: justify;"&gt;These days I'm finishing my &lt;span style="color: rgb(0, 0, 0);"&gt;Bachelor Thesis. In this post I will expose the results we obtained from de&lt;a href="http://bitsearch.blogspot.com/2010/07/scheme-of-reranking-results-for-video.html"&gt; reranker algorithm&lt;/a&gt;.&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="font-weight: bold; color: rgb(51, 204, 255);"&gt;Evaluation&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;The reranker algorithm for filtering the SGs at the intra and inter-asset level were tested on a representative dataset from CCMA, the public national broadcaster in Catalonia. The obtained ranked lists were evaluated in terms of keyframe relevance and asset diversity for different query topics and the generated resutls compared.&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="font-weight: bold; color: rgb(51, 204, 255);"&gt;Experiments&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;We selected a set of text queries form a list of controlled terms used by the documentalists that manually annotated the video assets. For every query, all retrieved keyframes were individually annotated as the relevant or not-relevant for the query. The criterion to establish the relevance was to consider if the keyframe may be tagged with textual query by an annotator who would only access keyframe. The text queries were:&lt;/span&gt;&lt;br /&gt;&lt;/div&gt;&lt;br /&gt;&lt;div style="text-align: justify;"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;a onblur="try  {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://1.bp.blogspot.com/_tOOi3R89e74/TSB-KTdBV2I/AAAAAAAAAGY/QIFtWUaP2_c/s1600/1.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 237px; height: 112px;" src="http://1.bp.blogspot.com/_tOOi3R89e74/TSB-KTdBV2I/AAAAAAAAAGY/QIFtWUaP2_c/s320/1.png" alt="" id="BLOGGER_PHOTO_ID_5557580655660980066" border="0" /&gt;&lt;/a&gt;&lt;/span&gt;&lt;span style="font-weight: bold; color: rgb(51, 204, 255);"&gt;Metrics&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;These experiments pursues the generation of results that accomplish two basic properties: relevant keyframes and diversity of assets. Two different metrics were used to evaluate these two qualities: the average precision and the average asset recall.&lt;/span&gt;&lt;br /&gt;&lt;/div&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;a onblur="try  {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://3.bp.blogspot.com/_tOOi3R89e74/TSB-eAaIW1I/AAAAAAAAAGg/p_miLQ2wR4w/s1600/2.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 320px; height: 54px;" src="http://3.bp.blogspot.com/_tOOi3R89e74/TSB-eAaIW1I/AAAAAAAAAGg/p_miLQ2wR4w/s320/2.png" alt="" id="BLOGGER_PHOTO_ID_5557580994145966930" border="0" /&gt;&lt;/a&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;Here, I present a new metric that is specifically designed to evaluate the reranker algorithm in the case of the corpus of CCMA.&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="font-weight: bold;"&gt;Average Asset-Diversity&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;The goal was to desgn a metric that behaved similarly to the AP, that is, a normalized value whose best output were the unit and that would introduce a larger penalization to the non-diverse results when they occur amount the earliest positions than when they occur in the latest ones. In a first approach, the Diversity at k would measure the variety of the results as:&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="font-weight: bold; color: rgb(51, 204, 255);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;a onblur="try  {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://1.bp.blogspot.com/_tOOi3R89e74/TSCEJtd8_tI/AAAAAAAAAGo/vozV9E2JKwk/s1600/3.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 320px; height: 70px;" src="http://1.bp.blogspot.com/_tOOi3R89e74/TSCEJtd8_tI/AAAAAAAAAGo/vozV9E2JKwk/s320/3.png" alt="" id="BLOGGER_PHOTO_ID_5557587242534108882" border="0" /&gt;&lt;/a&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;where d(k) corresponds to the amount of different video assets contained in the positions 1...k of the ranked list. Notice that this metric is only defined for k greater or equals 2 as the diversity can only be evaluated on asset of multiple items.&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;Combining the concept of AP with the diversity measure introduced, we proposed the Average A-Diversity (AD) as the second metric to evaluate any system where the diversity is among its specification. The next expression combines the Diversity at the k first positions, starting on 2 and going on until m, where m represents the total amount of different assets that are relevant to the query.&lt;/span&gt;&lt;br /&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="font-weight: bold; color: rgb(51, 204, 255);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;a onblur="try  {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://3.bp.blogspot.com/_tOOi3R89e74/TSCEJ4ItEUI/AAAAAAAAAGw/p4bjroG9GII/s1600/4.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 320px; height: 62px;" src="http://3.bp.blogspot.com/_tOOi3R89e74/TSCEJ4ItEUI/AAAAAAAAAGw/p4bjroG9GII/s320/4.png" alt="" id="BLOGGER_PHOTO_ID_5557587245397774658" border="0" /&gt;&lt;/a&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;Both AP and AD were calculated for every text query and their values averaged among all topics to obtain the Mean Average Precision (MAP) and Mean Average Asset-Diversity (MAD).&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="font-weight: bold; color: rgb(51, 204, 255);"&gt;Results&lt;/span&gt;&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;The previous solutions for the filtering of the SGs have been evaluated by considering the four presented options for ranking: non filter (only random walk), intra-asset filtering, inter-asset fitlering and both types of filtering. An additional baseline case was considered by using the resutls list obtained after the next search, with no further processing.&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;The MAP and MAD represented in the figures 1 and 2 clearly show the relevance increase introduced by the random walk. the filtering of the SG has little impact in the MAP, with a slight decreased when the inter filtering is introduced. The decrease is reasonable as any filtering operation is an action against the principles of relevance estimation in the SG: the more relevant are the more connected and, by removing connection, there is a loss in the data used to estimated relevance. In compensation, figure 2 proves that the filtering strategies increase the diversity of assets in the results. The removal of only inter-asset connections significantly decreases the MAP as it isolates groups of relevant keyframes whose score decreases in favor of other keyframes from their same asset. Nevertheless, the best results are obtained when the inter-asset filtering is combined with the intra-asset solution.&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;a onblur="try  {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://1.bp.blogspot.com/_tOOi3R89e74/TSCFfmTI4hI/AAAAAAAAAHQ/caUEStqvksE/s1600/_1.bmp"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 320px; height: 256px;" src="http://1.bp.blogspot.com/_tOOi3R89e74/TSCFfmTI4hI/AAAAAAAAAHQ/caUEStqvksE/s320/_1.bmp" alt="" id="BLOGGER_PHOTO_ID_5557588718078452242" border="0" /&gt;&lt;/a&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="text-align: center;"&gt;&lt;div&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;Figure 1&lt;br /&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;a onblur="try  {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/_tOOi3R89e74/TSCFfbRGBBI/AAAAAAAAAHI/Q7MCO9QGPmg/s1600/_2.bmp"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 320px; height: 256px;" src="http://2.bp.blogspot.com/_tOOi3R89e74/TSCFfbRGBBI/AAAAAAAAAHI/Q7MCO9QGPmg/s320/_2.bmp" alt="" id="BLOGGER_PHOTO_ID_5557588715117085714" border="0" /&gt;&lt;/a&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;Figure 2&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: justify;"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;The resutls per query concept are presented in figures 3 and 4. The first conclusion from these figures is that the domain of application of the filtering techniques has and impact on the obtained results. While the general conclusion drawn from the MA&lt;/span&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;P and MAD analysis apply, not all query concepts present the exact same behaviour. For example, the intra+inter filtering does not present the best AD in the "Formula1" and "Accident" domains, althought in general its behaviour is the most regular in terms of diversity.&lt;/span&gt;&lt;br /&gt;&lt;/div&gt;&lt;br /&gt;&lt;div style="text-align: center;"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;a onblur="try  {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://1.bp.blogspot.com/_tOOi3R89e74/TSCFfD2CzNI/AAAAAAAAAHA/D-PmYGkyEeo/s1600/_3.bmp"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 320px; height: 256px;" src="http://1.bp.blogspot.com/_tOOi3R89e74/TSCFfD2CzNI/AAAAAAAAAHA/D-PmYGkyEeo/s320/_3.bmp" alt="" id="BLOGGER_PHOTO_ID_5557588708829613266" border="0" /&gt;&lt;/a&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;Figure 3&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="text-align: center;"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="font-weight: bold; color: rgb(51, 204, 255);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&lt;span style="color: rgb(0, 0, 0);"&gt;&
