journal articles
![]() | Yanxia Zhang; Ming Ki Chong; Jörg Müller; Andreas Bulling; Hans Gellersen Eye Tracking for Public Displays in the Wild Journal Article Personal and Ubiquitous Computing, 19 (5), pp. 967-981, 2015. @article{Zhang15_PUC, title = {Eye Tracking for Public Displays in the Wild}, author = {Yanxia Zhang and Ming Ki Chong and Jörg Müller and Andreas Bulling and Hans Gellersen}, url = {https://perceptual.mpi-inf.mpg.de/files/2015/07/Zhang15_UC.pdf}, doi = {10.1007/s00779-015-0866-8}, year = {2015}, date = {2015-07-03}, journal = {Personal and Ubiquitous Computing}, volume = {19}, number = {5}, pages = {967-981}, abstract = {In public display contexts, interactions are spontaneous and have to work without preparation. We propose gaze as a modality for such con- texts, as gaze is always at the ready, and a natural indicator of the user’s interest. We present GazeHorizon, a system that demonstrates sponta- neous gaze interaction, enabling users to walk up to a display and navi- gate content using their eyes only. GazeHorizon is extemporaneous and optimised for instantaneous usability by any user without prior configura- tion, calibration or training. The system provides interactive assistance to bootstrap gaze interaction with unaware users, employs a single off-the- shelf web camera and computer vision for person-independent tracking of the horizontal gaze direction, and maps this input to rate-controlled nav- igation of horizontally arranged content. We have evaluated GazeHorizon through a series of field studies, culminating in a four-day deployment in a public environment during which over a hundred passers-by interacted with it, unprompted and unassisted. We realised that since eye move- ments are subtle, users cannot learn gaze interaction from only observing others, and as a results guidance is required.}, keywords = {}, pubstate = {published}, tppubtype = {article} } In public display contexts, interactions are spontaneous and have to work without preparation. We propose gaze as a modality for such con- texts, as gaze is always at the ready, and a natural indicator of the user’s interest. We present GazeHorizon, a system that demonstrates sponta- neous gaze interaction, enabling users to walk up to a display and navi- gate content using their eyes only. GazeHorizon is extemporaneous and optimised for instantaneous usability by any user without prior configura- tion, calibration or training. The system provides interactive assistance to bootstrap gaze interaction with unaware users, employs a single off-the- shelf web camera and computer vision for person-independent tracking of the horizontal gaze direction, and maps this input to rate-controlled nav- igation of horizontally arranged content. We have evaluated GazeHorizon through a series of field studies, culminating in a four-day deployment in a public environment during which over a hundred passers-by interacted with it, unprompted and unassisted. We realised that since eye move- ments are subtle, users cannot learn gaze interaction from only observing others, and as a results guidance is required. |
![]() | Tobias Loetscher; Celia Chen; Sabrina Hoppe; Andreas Bulling; Sophie Wignall; Churches Owen; Nicole Thomas; Andrew Lee Walking reduces spatial neglect Journal Article Journal of the International Neuropsychological Society, 21 (S2), pp. 120-121, 2015. @article{Loetscher_rim15, title = {Walking reduces spatial neglect}, author = {Tobias Loetscher and Celia Chen and Sabrina Hoppe and Andreas Bulling and Sophie Wignall and Churches Owen and Nicole Thomas and Andrew Lee}, url = {https://perceptual.mpi-inf.mpg.de/files/2016/01/Loetscher15_INS.pdf http://journals.cambridge.org/action/displayIssue?decade=2010&jid=INS&volumeId=21&issueId=s2&iid=10065024}, doi = {10.1017/S1355617715001290}, year = {2015}, date = {2015-05-01}, booktitle = {Journal of the International Neuropsychological Society}, journal = {Journal of the International Neuropsychological Society}, volume = {21}, number = {S2}, pages = {120-121}, publisher = {International Neuropsychology Society (INS) and the Australasian Society for the Study of Brain Impairment (ASSBI)}, abstract = {Spatial neglect is a common consequence of stroke. Neglect behaviour is typically exacerbated by increased task demands. It was thus anticipated that the addition of a secondary task requiring general attention (walking) would worsen performance on tests of spatial neglect. Here, however, we report a patient in whom neglect was considerably reduced when performing a visual search task while walking.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Spatial neglect is a common consequence of stroke. Neglect behaviour is typically exacerbated by increased task demands. It was thus anticipated that the addition of a secondary task requiring general attention (walking) would worsen performance on tests of spatial neglect. Here, however, we report a patient in whom neglect was considerably reduced when performing a visual search task while walking. |
![]() | Tobias Loetscher; Celia Chen; Sophie Wignall; Andreas Bulling; Sabrina Hoppe; Owen Churches; Nicole Thomas A study on the natural history of scanning behaviour in patients with visual field defects after stroke Journal Article BMC Neurology, 15 (1), pp. 64, 2015. @article{Loetscher_BMC15, title = {A study on the natural history of scanning behaviour in patients with visual field defects after stroke}, author = {Tobias Loetscher and Celia Chen and Sophie Wignall and Andreas Bulling and Sabrina Hoppe and Owen Churches and Nicole Thomas}, url = {https://perceptual.mpi-inf.mpg.de/files/2015/05/loetscher_BCMN15.pdf}, doi = {10.1186/s12883-015-0321-5}, year = {2015}, date = {2015-04-24}, journal = {BMC Neurology}, volume = {15}, number = {1}, pages = {64}, abstract = {A visual field defect (VFD) is a common consequence of stroke with a detrimental effect upon the survivors’ functional ability and quality of life. The identification of effective treatments for VFD is a key priority relating to life post-stroke. Understanding the natural evolution of scanning compensation over time may have important ramifications for the development of efficacious therapies. The study aims to unravel the natural history of visual scanning behaviour in patients with VFD. The assessment of scanning patterns in the acute to chronic stages of stroke will reveal who does and does not learn to compensate for vision loss. Methods/Design Eye-tracking glasses are used to delineate eye movements in a cohort of 100 stroke patients immediately after stroke, and additionally at 6 and 12 months post-stroke. The longitudinal study will assess eye movements in static (sitting) and dynamic (walking) conditions. The primary outcome constitutes the change of lateral eye movements from the acute to chronic stages of stroke. Secondary outcomes include changes of lateral eye movements over time as a function of subgroup characteristics, such as side of VFD, stroke location, stroke severity and cognitive functioning. Discussion The longitudinal comparison of patients who do and do not learn compensatory scanning techniques may reveal important prognostic markers of natural recovery. Importantly, it may also help to determine the most effective treatment window for visual rehabilitation.}, keywords = {}, pubstate = {published}, tppubtype = {article} } A visual field defect (VFD) is a common consequence of stroke with a detrimental effect upon the survivors’ functional ability and quality of life. The identification of effective treatments for VFD is a key priority relating to life post-stroke. Understanding the natural evolution of scanning compensation over time may have important ramifications for the development of efficacious therapies. The study aims to unravel the natural history of visual scanning behaviour in patients with VFD. The assessment of scanning patterns in the acute to chronic stages of stroke will reveal who does and does not learn to compensate for vision loss. Methods/Design Eye-tracking glasses are used to delineate eye movements in a cohort of 100 stroke patients immediately after stroke, and additionally at 6 and 12 months post-stroke. The longitudinal study will assess eye movements in static (sitting) and dynamic (walking) conditions. The primary outcome constitutes the change of lateral eye movements from the acute to chronic stages of stroke. Secondary outcomes include changes of lateral eye movements over time as a function of subgroup characteristics, such as side of VFD, stroke location, stroke severity and cognitive functioning. Discussion The longitudinal comparison of patients who do and do not learn compensatory scanning techniques may reveal important prognostic markers of natural recovery. Importantly, it may also help to determine the most effective treatment window for visual rehabilitation. |
![]() | Melodie Vidal; Andreas Bulling; Hans Gellersen Pursuits: Spontaneous Eye-Based Interaction for Dynamic Interfaces Journal Article ACM SIGMOBILE Mobile Computing and Communications Review, 18 (4), pp. 8-10, 2015. @article{Vidal_news15, title = {Pursuits: Spontaneous Eye-Based Interaction for Dynamic Interfaces}, author = { Melodie Vidal and Andreas Bulling and Hans Gellersen }, url = {http://dx.doi.org/10.1145/2721914.2721917 https://perceptual.mpi-inf.mpg.de/files/2015/01/p8-vidal.pdf}, year = {2015}, date = {2015-01-13}, journal = {ACM SIGMOBILE Mobile Computing and Communications Review}, volume = {18}, number = {4}, pages = {8-10}, abstract = {Although gaze is an attractive modality for pervasive interaction, real-world implementation of eye-based interfaces poses significant challenges. In particular, user calibration is tedious and time consuming. Pursuits is an innovative interaction technique that enables truly spontaneous interaction with eye-based interfaces. A user can simply walk up to the screen and readily interact with moving targets. Instead of being based on gaze location, Pursuits correlates eye pursuit movements with objects dynamically moving on the interface.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Although gaze is an attractive modality for pervasive interaction, real-world implementation of eye-based interfaces poses significant challenges. In particular, user calibration is tedious and time consuming. Pursuits is an innovative interaction technique that enables truly spontaneous interaction with eye-based interfaces. A user can simply walk up to the screen and readily interact with moving targets. Instead of being based on gaze location, Pursuits correlates eye pursuit movements with objects dynamically moving on the interface. |
![]() | Andreas Bulling; Ulf Blanke; Desney Tan; Jun Rekimoto; Gregory Abowd Introduction to the Special Issue on Activity Recognition for Interaction Journal Article ACM Transactions on Interactive Intelligent Systems (TiiS), 4 (4), pp. 16e:1-16e:3, 2015. @article{Bulling_TIIS15, title = {Introduction to the Special Issue on Activity Recognition for Interaction}, author = { Andreas Bulling and Ulf Blanke and Desney Tan and Jun Rekimoto and Gregory Abowd }, url = {http://dx.doi.org/10.1145/2694858 https://perceptual.mpi-inf.mpg.de/files/2015/02/Bulling_TIIS15.pdf}, year = {2015}, date = {2015-01-04}, journal = {ACM Transactions on Interactive Intelligent Systems (TiiS)}, volume = {4}, number = {4}, pages = {16e:1-16e:3}, abstract = {This editorial introduction describes the aims and scope of the ACM Transactions on Interactive Intelligent Systems special issue on Activity Recognition for Interaction. It explains why activity recognition is becoming crucial as part of the cycle of interaction between users and computing systems, and it shows how the five articles selected for this special issue reflect this theme.}, keywords = {}, pubstate = {published}, tppubtype = {article} } This editorial introduction describes the aims and scope of the ACM Transactions on Interactive Intelligent Systems special issue on Activity Recognition for Interaction. It explains why activity recognition is becoming crucial as part of the cycle of interaction between users and computing systems, and it shows how the five articles selected for this special issue reflect this theme. |
![]() | Eduardo Velloso; Dominik Schmidt; Jason Alexander; Hans Gellersen; Andreas Bulling The Feet in HCI: A Survey of Foot-Based Interaction Journal Article ACM Computing Surveys, 48 (2), pp. 21:1-21:35, 2015. @article{velloso15_csur, title = {The Feet in HCI: A Survey of Foot-Based Interaction}, author = {Eduardo Velloso and Dominik Schmidt and Jason Alexander and Hans Gellersen and Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/files/2015/08/velloso16_csur.pdf}, doi = {10.1145/2816455}, year = {2015}, date = {2015-01-01}, journal = {ACM Computing Surveys}, volume = {48}, number = {2}, pages = {21:1-21:35}, abstract = {Foot-operated computer interfaces have been studied since the inception of Human-Computer Interaction. Thanks to the miniaturisation and decreasing cost of sensing technology, there is an increasing interest exploring this alternative input modality, but no comprehensive overview of its research landscape. In this survey, we review the literature on interfaces operated by the lower limbs. We investigate the characteristics of users and how they affect the design of such interfaces. Next, we describe and analyse foot-based research prototypes and commercial systems in how they capture input and provide feedback. We then analyse the interactions between users and systems from the perspective of the actions performed in these interactions. Finally, we discuss our findings and use them to identify open questions and directions for future research.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Foot-operated computer interfaces have been studied since the inception of Human-Computer Interaction. Thanks to the miniaturisation and decreasing cost of sensing technology, there is an increasing interest exploring this alternative input modality, but no comprehensive overview of its research landscape. In this survey, we review the literature on interfaces operated by the lower limbs. We investigate the characteristics of users and how they affect the design of such interfaces. Next, we describe and analyse foot-based research prototypes and commercial systems in how they capture input and provide feedback. We then analyse the interactions between users and systems from the perspective of the actions performed in these interactions. Finally, we discuss our findings and use them to identify open questions and directions for future research. |
conference papers
![]() | Yusuke Sugano; Andreas Bulling Self-Calibrating Head-Mounted Eye Trackers Using Egocentric Visual Saliency Inproceedings Proc. of the 28th ACM Symposium on User Interface Software and Technology (UIST 2015), pp. 363-372, 2015. @inproceedings{Sugano_UIST15, title = {Self-Calibrating Head-Mounted Eye Trackers Using Egocentric Visual Saliency}, author = {Yusuke Sugano and Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/files/2015/08/Sugano_UIST15.pdf https://www.youtube.com/watch?v=CvsZ3YCWFPk}, doi = {10.1145/2807442.2807445}, year = {2015}, date = {2015-11-05}, booktitle = {Proc. of the 28th ACM Symposium on User Interface Software and Technology (UIST 2015)}, pages = {363-372}, abstract = {Head-mounted eye tracking has significant potential for gaze-based applications such as life logging, mental health monitoring, or quantified self. However, a neglected challenge for such applications is that drift in the initial person-specific eye tracker calibration, for example caused by physical activity, can severely impact gaze estimation accuracy and, thus, system performance and user experience. We first analyse calibration drift on a new dataset of natural gaze data recorded using synchronised video-based and Electrooculography-based eye trackers of 20 users performing everyday activities in a mobile setting. Based on this analysis we present a method to automatically self-calibrate head-mounted eye trackers based on a computational model of bottom-up visual saliency. Through evaluations on the dataset we show that our method is 1) effective in reducing calibration drift in calibrated eye trackers and 2) given sufficient data, can achieve competitive gaze estimation accuracy to a calibrated eye tracker without any manual calibration.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Head-mounted eye tracking has significant potential for gaze-based applications such as life logging, mental health monitoring, or quantified self. However, a neglected challenge for such applications is that drift in the initial person-specific eye tracker calibration, for example caused by physical activity, can severely impact gaze estimation accuracy and, thus, system performance and user experience. We first analyse calibration drift on a new dataset of natural gaze data recorded using synchronised video-based and Electrooculography-based eye trackers of 20 users performing everyday activities in a mobile setting. Based on this analysis we present a method to automatically self-calibrate head-mounted eye trackers based on a computational model of bottom-up visual saliency. Through evaluations on the dataset we show that our method is 1) effective in reducing calibration drift in calibrated eye trackers and 2) given sufficient data, can achieve competitive gaze estimation accuracy to a calibrated eye tracker without any manual calibration. |
![]() | Florian Alt; Andreas Bulling; Gino Gravanis; Daniel Buschek GravitySpot: Guiding Users in Front of Public Displays Using On-Screen Visual Cues Inproceedings Proc. of the 28th ACM Symposium on User Interface Software and Technology (UIST 2015), pp. 47-56, 2015. @inproceedings{Alt_UIST15, title = {GravitySpot: Guiding Users in Front of Public Displays Using On-Screen Visual Cues}, author = {Florian Alt and Andreas Bulling and Gino Gravanis and Daniel Buschek }, url = {https://perceptual.mpi-inf.mpg.de/files/2015/08/Alt_UIST15.pdf https://www.youtube.com/watch?v=laWfbOpQQ8A}, doi = {10.1145/2807442.2807490}, year = {2015}, date = {2015-11-05}, booktitle = {Proc. of the 28th ACM Symposium on User Interface Software and Technology (UIST 2015)}, pages = {47-56}, abstract = {Users tend to position themselves in front of interactive public displays in such a way as to best perceive its content. Currently, this sweet spot is implicitly defined by display properties, content, the input modality, as well as space constraints in front of the display. We present GravitySpot – an approach that makes sweet spots flexible by actively guiding users to arbitrary target positions in front of displays using visual cues. Such guidance is beneficial, for example, if a particular input technology only works at a specific distance or if users should be guided towards a non-crowded area of a large display. In two controlled lab studies (n=29) we evaluate different visual cues based on color, shape, and motion, as well as position-to-cue mapping functions. We show that both the visual cues and mapping functions allow for fine-grained control over positioning speed and accuracy. Findings are complemented by observations from a 3-month real-world deployment.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Users tend to position themselves in front of interactive public displays in such a way as to best perceive its content. Currently, this sweet spot is implicitly defined by display properties, content, the input modality, as well as space constraints in front of the display. We present GravitySpot – an approach that makes sweet spots flexible by actively guiding users to arbitrary target positions in front of displays using visual cues. Such guidance is beneficial, for example, if a particular input technology only works at a specific distance or if users should be guided towards a non-crowded area of a large display. In two controlled lab studies (n=29) we evaluate different visual cues based on color, shape, and motion, as well as position-to-cue mapping functions. We show that both the visual cues and mapping functions allow for fine-grained control over positioning speed and accuracy. Findings are complemented by observations from a 3-month real-world deployment. |
![]() | Christian Lander; Sven Gehring; Antonio Krüger; Sebastian Boring; Andreas Bulling GazeProjector: Accurate Gaze Estimation and Seamless Gaze Interaction Across Multiple Displays Inproceedings Proc. of the 28th ACM Symposium on User Interface Software and Technology (UIST 2015), pp. 395-404, 2015. @inproceedings{Lander_UIST15, title = {GazeProjector: Accurate Gaze Estimation and Seamless Gaze Interaction Across Multiple Displays}, author = {Christian Lander and Sven Gehring and Antonio Krüger and Sebastian Boring and Andreas Bulling }, url = {https://perceptual.mpi-inf.mpg.de/files/2015/08/Lander_UIST15.pdf https://www.youtube.com/watch?v=peuL4WRfrRM}, doi = {10.1145/2807442.2807479}, year = {2015}, date = {2015-11-01}, booktitle = {Proc. of the 28th ACM Symposium on User Interface Software and Technology (UIST 2015)}, pages = {395-404}, abstract = {Mobile gaze-based interaction with multiple displays may occur from arbitrary positions and orientations. However, maintaining high gaze estimation accuracy in such situations remains a significant challenge. In this paper, we present GazeProjector, a system that combines (1) natural feature tracking on displays to determine the mobile eye tracker’s position relative to a display with (2) accurate point-of-gaze estimation. GazeProjector allows for seamless gaze estimation and interaction on multiple displays of arbitrary sizes independently of the user’s position and orientation to the display. In a user study with 12 participants we compare GazeProjector to established methods (here: visual on-screen markers and a state-of-the-art video-based motion capture system). We show that our approach is robust to varying head poses, orientations, and distances to the display, while still providing high gaze estimation accuracy across multiple displays without re-calibration for each variation. Our system represents an important step towards the vision of pervasive gaze-based interfaces.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Mobile gaze-based interaction with multiple displays may occur from arbitrary positions and orientations. However, maintaining high gaze estimation accuracy in such situations remains a significant challenge. In this paper, we present GazeProjector, a system that combines (1) natural feature tracking on displays to determine the mobile eye tracker’s position relative to a display with (2) accurate point-of-gaze estimation. GazeProjector allows for seamless gaze estimation and interaction on multiple displays of arbitrary sizes independently of the user’s position and orientation to the display. In a user study with 12 participants we compare GazeProjector to established methods (here: visual on-screen markers and a state-of-the-art video-based motion capture system). We show that our approach is robust to varying head poses, orientations, and distances to the display, while still providing high gaze estimation accuracy across multiple displays without re-calibration for each variation. Our system represents an important step towards the vision of pervasive gaze-based interfaces. |
![]() | Augusto Esteves; Eduardo Velloso; Andreas Bulling; Hans Gellersen Orbits: Gaze Interaction in Smart Watches using Moving Targets Inproceedings Proc. of the 28th ACM Symposium on User Interface Software and Technology (UIST 2015), pp. 457-466, 2015, (best paper award). @inproceedings{Esteves_UIST15, title = {Orbits: Gaze Interaction in Smart Watches using Moving Targets}, author = {Augusto Esteves and Eduardo Velloso and Andreas Bulling and Hans Gellersen}, url = {https://perceptual.mpi-inf.mpg.de/files/2015/09/Esteves_UIST15.pdf https://www.youtube.com/watch?v=KEIgw5A0yfI http://www.wired.co.uk/news/archive/2016-01/22/eye-tracking-smartwatch}, doi = {10.1145/2807442.2807499}, year = {2015}, date = {2015-11-01}, booktitle = {Proc. of the 28th ACM Symposium on User Interface Software and Technology (UIST 2015)}, pages = {457-466}, abstract = {We introduce Orbits, a novel gaze interaction technique that enables hands-free input on smart watches. The technique relies on moving controls to leverage the smooth pursuit movements of the eyes and detect whether and at which control the user is looking at. In Orbits, controls include targets that move in a circular trajectory in the face of the watch, and can be selected by following the desired one for a small amount of time. We conducted two user studies to assess the technique’s recognition and robustness, which demonstrated how Orbits is robust against false positives triggered by natural eye movements and how it presents a hands-free, high accuracy way of interacting with smart watches using off-the-shelf devices. Finally, we developed three example interfaces built with Orbits: a music player, a notifications face plate and a missed call menu. Despite relying on moving controls – very unusual in current HCI interfaces – these were generally well received by participants in a third and final study.}, note = {best paper award}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } We introduce Orbits, a novel gaze interaction technique that enables hands-free input on smart watches. The technique relies on moving controls to leverage the smooth pursuit movements of the eyes and detect whether and at which control the user is looking at. In Orbits, controls include targets that move in a circular trajectory in the face of the watch, and can be selected by following the desired one for a small amount of time. We conducted two user studies to assess the technique’s recognition and robustness, which demonstrated how Orbits is robust against false positives triggered by natural eye movements and how it presents a hands-free, high accuracy way of interacting with smart watches using off-the-shelf devices. Finally, we developed three example interfaces built with Orbits: a music player, a notifications face plate and a missed call menu. Despite relying on moving controls – very unusual in current HCI interfaces – these were generally well received by participants in a third and final study. |
![]() | Sabrina Hoppe; Tobias Loetscher; Stephanie Morey; Andreas Bulling Recognition of Curiosity Using Eye Movement Analysis Inproceedings Adj. Proc. of the ACM International Joint Conference on Pervasive and Ubiquitous Computing (UbiComp 2015), pp. 185-188, 2015. @inproceedings{Hoppe15_ubicomp, title = {Recognition of Curiosity Using Eye Movement Analysis}, author = {Sabrina Hoppe and Tobias Loetscher and Stephanie Morey and Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/files/2015/07/Hoppe_Ubicomp15.pdf http://de.in-mind.org/blog/post/das-fenster-zum-gehirn-was-computer-in-unseren-blicken-lesen}, doi = {10.1145/2800835.2800910}, year = {2015}, date = {2015-09-09}, booktitle = {Adj. Proc. of the ACM International Joint Conference on Pervasive and Ubiquitous Computing (UbiComp 2015)}, pages = {185-188}, abstract = {Among the different personality traits that guide our behaviour, curiosity is particularly interesting for context-aware assistive systems as it is closely linked to our well-being and the way we learn. This work proposes eye movement analysis for automatic recognition of different levels of curiosity. We present a 26-participant gaze dataset recorded during a real-world shopping task with empirically validated curiosity questionnaires as ground truth. Using a support vector machine classifier and a leave-one-person-out evaluation scheme we can discriminate between two to four classes of standard curiosity scales well above chance. These results are promising and point towards a new class of context-aware systems that take the user's curiosity into account, thereby enabling new types of interaction and user adaptation.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Among the different personality traits that guide our behaviour, curiosity is particularly interesting for context-aware assistive systems as it is closely linked to our well-being and the way we learn. This work proposes eye movement analysis for automatic recognition of different levels of curiosity. We present a 26-participant gaze dataset recorded during a real-world shopping task with empirically validated curiosity questionnaires as ground truth. Using a support vector machine classifier and a leave-one-person-out evaluation scheme we can discriminate between two to four classes of standard curiosity scales well above chance. These results are promising and point towards a new class of context-aware systems that take the user's curiosity into account, thereby enabling new types of interaction and user adaptation. |
![]() | Mohamed Khamis; Andreas Bulling; Florian Alt Tackling Challenges of Interactive Public Displays using Gaze Inproceedings Adj. Proc. of the ACM International Joint Conference on Pervasive and Ubiquitous Computing (UbiComp 2015), pp. 763-766, 2015. @inproceedings{Khamis15_pdapps, title = {Tackling Challenges of Interactive Public Displays using Gaze}, author = {Mohamed Khamis and Andreas Bulling and Florian Alt}, url = {https://perceptual.mpi-inf.mpg.de/files/2015/07/Khamis15_pdapps.pdf}, doi = {10.1145/2800835.2807951}, year = {2015}, date = {2015-09-08}, booktitle = {Adj. Proc. of the ACM International Joint Conference on Pervasive and Ubiquitous Computing (UbiComp 2015)}, journal = {Proc. of the 2nd Workshop on Challenges and Opportunities in Creating Applications for Pervasive Public Display Networks (PD-Apps 2015). }, pages = {763-766}, abstract = {Falling hardware prices led to a widespread use of public displays. Common interaction techniques for such displays currently include touch, mid-air, or smartphone-based interaction. While these techniques are well understood from a technical perspective, several remaining challenges hinder the uptake of interactive displays among passersby. In this paper we propose addressing major public display challenges through gaze as a novel interaction modality. We discuss why gaze-based interaction can tackle these challenges effectively and discuss how solutions can be technically realized. Furthermore, we summarize state-of-the-art eye tracking techniques that show particular promise in the area of public displays.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Falling hardware prices led to a widespread use of public displays. Common interaction techniques for such displays currently include touch, mid-air, or smartphone-based interaction. While these techniques are well understood from a technical perspective, several remaining challenges hinder the uptake of interactive displays among passersby. In this paper we propose addressing major public display challenges through gaze as a novel interaction modality. We discuss why gaze-based interaction can tackle these challenges effectively and discuss how solutions can be technically realized. Furthermore, we summarize state-of-the-art eye tracking techniques that show particular promise in the area of public displays. |
![]() | Mohamed Khamis; Florian Alt; Andreas Bulling A Field Study on Spontaneous Gaze-based Interaction with a Public Display using Pursuits Inproceedings Adj. Proc. of the ACM International Joint Conference on Pervasive and Ubiquitous Computing (UbiComp 2015), pp. 865-874, 2015. @inproceedings{Khamis15_ubicomp, title = {A Field Study on Spontaneous Gaze-based Interaction with a Public Display using Pursuits}, author = {Mohamed Khamis and Florian Alt and Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/files/2015/07/Khamis15_Ubicomp.pdf}, doi = {10.1145/2800835.2804335}, year = {2015}, date = {2015-09-07}, booktitle = {Adj. Proc. of the ACM International Joint Conference on Pervasive and Ubiquitous Computing (UbiComp 2015)}, pages = {865-874}, abstract = {Smooth pursuit eye movements were recently introduced as a promising technique for calibration-free and thus spontaneous and natural gaze interaction. While pursuits have been evaluated in controlled laboratory studies, the technique has not yet been evaluated with respect to usability in the wild. We report on a field study in which we deployed a game on a public display where participants used pursuits to select fish moving in linear and circular trajectories at different speeds. The study ran for two days in a busy computer lab resulting in a total of 56 interactions. Results from our study show that linear trajectories are statistically faster to select via pursuits than circular trajectories. We also found that pursuits is well perceived by users who find it fast and responsive.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Smooth pursuit eye movements were recently introduced as a promising technique for calibration-free and thus spontaneous and natural gaze interaction. While pursuits have been evaluated in controlled laboratory studies, the technique has not yet been evaluated with respect to usability in the wild. We report on a field study in which we deployed a game on a public display where participants used pursuits to select fish moving in linear and circular trajectories at different speeds. The study ran for two days in a busy computer lab resulting in a total of 56 interactions. Results from our study show that linear trajectories are statistically faster to select via pursuits than circular trajectories. We also found that pursuits is well perceived by users who find it fast and responsive. |
![]() | Andreas Bulling Human Visual Behaviour for Collaborative Human-Machine Interaction Inproceedings Adj. Proc. of the ACM International Joint Conference on Pervasive and Ubiquitous Computing (UbiComp 2015), pp. 903-907, 2015. @inproceedings{bulling15_ubicomp, title = {Human Visual Behaviour for Collaborative Human-Machine Interaction}, author = {Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/files/2015/11/p901-bulling.pdf}, doi = {10.1145/2800835.2815378}, year = {2015}, date = {2015-09-07}, booktitle = {Adj. Proc. of the ACM International Joint Conference on Pervasive and Ubiquitous Computing (UbiComp 2015)}, pages = {903-907}, abstract = {Non-verbal behavioural cues are fundamental to human communication and interaction. Despite significant advances in recent years, state-of-the-art human-machine systems still fall short in sensing, analysing, and fully "understanding" cues naturally expressed in everyday settings. Two of the most important non-verbal cues, as evidenced by a large body of work in experimental psychology and behavioural sciences, are visual (gaze) behaviour and body language. We envision a new class of collaborative human-machine systems that fully exploit the information content available in non-verbal human behaviour in everyday settings through joint analysis of human gaze and physical behaviour.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Non-verbal behavioural cues are fundamental to human communication and interaction. Despite significant advances in recent years, state-of-the-art human-machine systems still fall short in sensing, analysing, and fully "understanding" cues naturally expressed in everyday settings. Two of the most important non-verbal cues, as evidenced by a large body of work in experimental psychology and behavioural sciences, are visual (gaze) behaviour and body language. We envision a new class of collaborative human-machine systems that fully exploit the information content available in non-verbal human behaviour in everyday settings through joint analysis of human gaze and physical behaviour. |
![]() | Augusto Esteves; Eduardo Velloso; Andreas Bulling; Hans Gellersen Orbits: Enabling Gaze Interaction in Smart Watches using Moving Targets Inproceedings Adj. Proc. of the ACM International Joint Conference on Pervasive and Ubiquitous Computing (UbiComp 2015), pp. 419-422, 2015. @inproceedings{Esteves15_UbiComp, title = {Orbits: Enabling Gaze Interaction in Smart Watches using Moving Targets}, author = {Augusto Esteves and Eduardo Velloso and Andreas Bulling and Hans Gellersen}, url = {https://perceptual.mpi-inf.mpg.de/files/2015/08/Esteves15_UbiComp.pdf http://www.wired.co.uk/news/archive/2016-01/22/eye-tracking-smartwatch}, doi = {10.1145/2800835.2800942}, year = {2015}, date = {2015-09-01}, booktitle = {Adj. Proc. of the ACM International Joint Conference on Pervasive and Ubiquitous Computing (UbiComp 2015)}, pages = {419-422}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
![]() | Arif Khan; Ingmar Steiner; Ross Macdonald; Yusuke Sugano; Andreas Bulling Scene viewing and gaze analysis during phonetic segmentation tasks Inproceedings Proc. of the 18th European Conference on Eye Movements (ECEM 2015), 2015. @inproceedings{Khan15_ECEM, title = {Scene viewing and gaze analysis during phonetic segmentation tasks}, author = {Arif Khan and Ingmar Steiner and Ross Macdonald and Yusuke Sugano and Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/files/2015/08/Khan15_ECEM.pdf}, year = {2015}, date = {2015-08-17}, booktitle = {Proc. of the 18th European Conference on Eye Movements (ECEM 2015)}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
![]() | Philipp Müller; Sikandar Amin; Prateek Verma; Mykhaylo Andriluka; Andreas Bulling Emotion recognition from embedded bodily expressions and speech during dyadic interactions Inproceedings Proc. of the 6th International Conference on Affective Computing and Intelligent Interaction (ACII), pp. 663-669, 2015. @inproceedings{mueller15_acii, title = {Emotion recognition from embedded bodily expressions and speech during dyadic interactions}, author = {Philipp Müller and Sikandar Amin and Prateek Verma and Mykhaylo Andriluka and Andreas Bulling }, url = {https://perceptual.mpi-inf.mpg.de/files/2015/07/Mueller15_ACII.pdf}, doi = {10.1109/ACII.2015.7344640}, year = {2015}, date = {2015-06-26}, booktitle = {Proc. of the 6th International Conference on Affective Computing and Intelligent Interaction (ACII)}, pages = {663-669}, abstract = {Previous work on emotion recognition from bodily expressions focused on analysing such expressions in isolation, of individuals or in controlled settings, from a single camera view, or required intrusive motion tracking equipment. We study the problem of emotion recognition from bodily expressions and speech during dyadic (person-person) interactions in a real kitchen instrumented with ambient cameras and microphones. We specifically focus on bodily expressions that are embedded in regular interactions and background activities and recorded without human augmentation to increase naturalness of the expressions. We present a human-validated dataset that contains 224 high-resolution, multi-view video clips and audio recordings of emotionally charged interactions between eight couples of actors. The dataset is fully annotated with categorical labels for four basic emotions (anger, happiness, sadness, and surprise) and continuous labels for valence, activation, power, and anticipation provided by five annotators for each actor. We evaluate vision and audio-based emotion recognition using dense trajectories and a standard audio pipeline and provide insights into the importance of different body parts and audio features for emotion recognition.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Previous work on emotion recognition from bodily expressions focused on analysing such expressions in isolation, of individuals or in controlled settings, from a single camera view, or required intrusive motion tracking equipment. We study the problem of emotion recognition from bodily expressions and speech during dyadic (person-person) interactions in a real kitchen instrumented with ambient cameras and microphones. We specifically focus on bodily expressions that are embedded in regular interactions and background activities and recorded without human augmentation to increase naturalness of the expressions. We present a human-validated dataset that contains 224 high-resolution, multi-view video clips and audio recordings of emotionally charged interactions between eight couples of actors. The dataset is fully annotated with categorical labels for four basic emotions (anger, happiness, sadness, and surprise) and continuous labels for valence, activation, power, and anticipation provided by five annotators for each actor. We evaluate vision and audio-based emotion recognition using dense trajectories and a standard audio pipeline and provide insights into the importance of different body parts and audio features for emotion recognition. |
![]() | Julian Steil; Andreas Bulling Discovery of Everyday Human Activities From Long-Term Visual Behaviour Using Topic Models Inproceedings Proc. of the 2015 ACM International Joint Conference on Pervasive and Ubiquitous Computing (UbiComp 2015), pp. 75-85, 2015. @inproceedings{Steil_Ubicomp15, title = {Discovery of Everyday Human Activities From Long-Term Visual Behaviour Using Topic Models}, author = {Julian Steil and Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/files/2015/08/Steil_Ubicomp15.pdf https://perceptual.mpi-inf.mpg.de/research/datasets/#steil15_ubicomp}, doi = {10.1145/2750858.2807520}, year = {2015}, date = {2015-05-21}, booktitle = {Proc. of the 2015 ACM International Joint Conference on Pervasive and Ubiquitous Computing (UbiComp 2015)}, pages = {75-85}, abstract = {Human visual behaviour has significant potential for activity recognition and computational behaviour analysis, but previous works focused on supervised methods and recognition of predefined activity classes based on short-term eye movement recordings. We propose a fully unsupervised method to discover users' everyday activities from their long-term visual behaviour. Our method combines a bag-of-words representation of visual behaviour that encodes saccades, fixations, and blinks with a latent Dirichlet allocation (LDA) topic model. We further propose different methods to encode saccades for their use in the topic model. We evaluate our method on a novel long-term gaze dataset that contains full-day recordings of natural visual behaviour of 10 participants (more than 80 hours in total). We also provide annotations for eight sample activity classes (outdoor, social interaction, focused work, travel, reading, computer work, watching media, eating) and periods with no specific activity. We show the ability of our method to discover these activities with performance competitive with that of previously published supervised methods.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Human visual behaviour has significant potential for activity recognition and computational behaviour analysis, but previous works focused on supervised methods and recognition of predefined activity classes based on short-term eye movement recordings. We propose a fully unsupervised method to discover users' everyday activities from their long-term visual behaviour. Our method combines a bag-of-words representation of visual behaviour that encodes saccades, fixations, and blinks with a latent Dirichlet allocation (LDA) topic model. We further propose different methods to encode saccades for their use in the topic model. We evaluate our method on a novel long-term gaze dataset that contains full-day recordings of natural visual behaviour of 10 participants (more than 80 hours in total). We also provide annotations for eight sample activity classes (outdoor, social interaction, focused work, travel, reading, computer work, watching media, eating) and periods with no specific activity. We show the ability of our method to discover these activities with performance competitive with that of previously published supervised methods. |
![]() | Florian Alt; Stefan Schneegass; Alireza Sahami; Mariam Hassib; Andreas Bulling Graphical Passwords in the Wild – Understanding How Users Choose Pictures and Passwords in Image-based Authentication Schemes Inproceedings Proc. of the 17th International Conference on Human-Computer Interaction with Mobile Devices and Services (MobileHCI 2015), pp. 316-322, 2015. @inproceedings{Alt_MobileHCI15, title = {Graphical Passwords in the Wild – Understanding How Users Choose Pictures and Passwords in Image-based Authentication Schemes}, author = {Florian Alt and Stefan Schneegass and Alireza Sahami and Mariam Hassib and Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/files/2015/07/Alt_MobileHCI15.pdf}, doi = {10.1145/2785830.2785882}, year = {2015}, date = {2015-04-21}, booktitle = {Proc. of the 17th International Conference on Human-Computer Interaction with Mobile Devices and Services (MobileHCI 2015)}, pages = {316-322}, abstract = {Common user authentication methods on smartphones, such as lock patterns, PINs, or passwords, impose a trade-off between security and password memorability. Image-based passwords were proposed as a secure and usable alternative. As of today, however, it remains unclear how such schemes are used in the wild. We present the first study to investigate how image-based passwords are used over long periods of time in the real world. Our analyses are based on data from 2318 unique devices collected over more than one year using a custom application released in the Android Play store. We present an in-depth analysis of what kind of images users select, how they define their passwords, and how secure these passwords are. Our findings provide valuable insights into real-world use of image-based passwords and inform the design of future graphical authentication schemes.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Common user authentication methods on smartphones, such as lock patterns, PINs, or passwords, impose a trade-off between security and password memorability. Image-based passwords were proposed as a secure and usable alternative. As of today, however, it remains unclear how such schemes are used in the wild. We present the first study to investigate how image-based passwords are used over long periods of time in the real world. Our analyses are based on data from 2318 unique devices collected over more than one year using a custom application released in the Android Play store. We present an in-depth analysis of what kind of images users select, how they define their passwords, and how secure these passwords are. Our findings provide valuable insights into real-world use of image-based passwords and inform the design of future graphical authentication schemes. |
![]() | Nikolina Koleva; Sabrina Hoppe; Mohammed Mehdi Moniri; Maria Staudte; Andreas Bulling On the interplay between spontaneous spoken instructions and human visual behaviour in an indoor guidance task Inproceedings Proc. of the 37th Annual Meeting of the Cognitive Science Society, 2015. @inproceedings{koleva15_cogsci, title = {On the interplay between spontaneous spoken instructions and human visual behaviour in an indoor guidance task}, author = {Nikolina Koleva and Sabrina Hoppe and Mohammed Mehdi Moniri and Maria Staudte and Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/files/2015/05/koleva_cogsci15.pdf}, year = {2015}, date = {2015-04-02}, booktitle = {Proc. of the 37th Annual Meeting of the Cognitive Science Society}, abstract = {We report on an indoor guidance study to explore the inter- play between spontaneous spoken instructions and listeners’ eye movement behaviour. The study involves a remote speaker (the instructor) to verbally guide a listener (the walker) to com- plete nine everyday tasks in different locations inside a room. We collect a multi-modal dataset of 12 pairs of users consist- ing of egocentric videos from the listener’s perspective, their gaze data, and instructors’ verbal instructions. We analyse the impact on instructions and listener gaze when the speaker can see 1) only the egocentric video, 2) the video and the point of gaze, or 3) the video and gaze with artificial noise. Our re- sults show that gaze behaviour varies significantly after (but hardly before) instructions and that speakers give more nega- tive feedback when listener gaze is available. These findings suggest that although speakers use gaze information as an in- dication of what referent the listener is effectively considering, this does not lead listeners to deliberately use their gaze as a pointer even when this is potentially beneficial for the task.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } We report on an indoor guidance study to explore the inter- play between spontaneous spoken instructions and listeners’ eye movement behaviour. The study involves a remote speaker (the instructor) to verbally guide a listener (the walker) to com- plete nine everyday tasks in different locations inside a room. We collect a multi-modal dataset of 12 pairs of users consist- ing of egocentric videos from the listener’s perspective, their gaze data, and instructors’ verbal instructions. We analyse the impact on instructions and listener gaze when the speaker can see 1) only the egocentric video, 2) the video and the point of gaze, or 3) the video and gaze with artificial noise. Our re- sults show that gaze behaviour varies significantly after (but hardly before) instructions and that speakers give more nega- tive feedback when listener gaze is available. These findings suggest that although speakers use gaze information as an in- dication of what referent the listener is effectively considering, this does not lead listeners to deliberately use their gaze as a pointer even when this is potentially beneficial for the task. |
![]() | Hosnieh Sattar; Sabine Müller; Mario Fritz; Andreas Bulling Prediction of Search Targets From Fixations in Open-World Settings Inproceedings Proc. of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2015), pp. 981-990, 2015. @inproceedings{sattar15_cvpr, title = {Prediction of Search Targets From Fixations in Open-World Settings}, author = {Hosnieh Sattar and Sabine Müller and Mario Fritz and Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/files/2015/04/sattar15_cvpr.pdf https://perceptual.mpi-inf.mpg.de/research/datasets/#sattar15_cvpr}, doi = {10.1109/CVPR.2015.7298700}, year = {2015}, date = {2015-03-02}, booktitle = {Proc. of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2015)}, pages = {981-990}, abstract = {Previous work on predicting the target of visual search from human fixations only considered closed-world settings in which training labels are available and predictions are performed for a known set of potential targets. In this work we go beyond the state of the art by studying search target prediction in an open-world setting in which we no longer assume that we have fixation data to train for the search targets. We present a dataset containing fixation data of 18 users searching for natural images from three image categories within synthesised image collages of about 80 images. In a closed-world baseline experiment we show that we can predict the correct target image out of a candidate set of five images. We then present a new problem formulation for search target prediction in the open-world setting that is based on learning compatibilities between fixations and potential targets.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Previous work on predicting the target of visual search from human fixations only considered closed-world settings in which training labels are available and predictions are performed for a known set of potential targets. In this work we go beyond the state of the art by studying search target prediction in an open-world setting in which we no longer assume that we have fixation data to train for the search targets. We present a dataset containing fixation data of 18 users searching for natural images from three image categories within synthesised image collages of about 80 images. In a closed-world baseline experiment we show that we can predict the correct target image out of a candidate set of five images. We then present a new problem formulation for search target prediction in the open-world setting that is based on learning compatibilities between fixations and potential targets. |
![]() | Xucong Zhang; Yusuke Sugano; Mario Fritz; Andreas Bulling Appearance-Based Gaze Estimation in the Wild Inproceedings Proc. of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2015), pp. 4511-4520, 2015. @inproceedings{zhang15_cvpr, title = {Appearance-Based Gaze Estimation in the Wild}, author = {Xucong Zhang and Yusuke Sugano and Mario Fritz and Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/files/2015/04/zhang_CVPR15.pdf https://www.youtube.com/watch?v=rw6LZA1USG8 https://perceptual.mpi-inf.mpg.de/research/datasets/#zhang15_cvpr}, doi = {10.1109/CVPR.2015.7299081}, year = {2015}, date = {2015-03-02}, booktitle = {Proc. of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2015)}, pages = {4511-4520}, abstract = {Appearance-based gaze estimation is believed to work well in real-world settings but existing datasets were collected under controlled laboratory conditions and methods were not evaluated across multiple datasets. In this work we study appearance-based gaze estimation in the wild. We present the MPIIGaze dataset that contains 213,659 images we collected from 15 participants during natural everyday laptop use over more than three months. Our dataset is significantly more variable than existing datasets with respect to appearance and illumination. We also present a method for in-the-wild appearance-based gaze estimation using multimodal convolutional neural networks, which significantly outperforms state-of-the art methods in the most challenging cross-dataset evaluation setting. We present an extensive evaluation of several state-of-the-art image-based gaze estimation algorithm on three current datasets, including our own. This evaluation provides clear insights and allows us identify key research challenges of gaze estimation in the wild.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Appearance-based gaze estimation is believed to work well in real-world settings but existing datasets were collected under controlled laboratory conditions and methods were not evaluated across multiple datasets. In this work we study appearance-based gaze estimation in the wild. We present the MPIIGaze dataset that contains 213,659 images we collected from 15 participants during natural everyday laptop use over more than three months. Our dataset is significantly more variable than existing datasets with respect to appearance and illumination. We also present a method for in-the-wild appearance-based gaze estimation using multimodal convolutional neural networks, which significantly outperforms state-of-the art methods in the most challenging cross-dataset evaluation setting. We present an extensive evaluation of several state-of-the-art image-based gaze estimation algorithm on three current datasets, including our own. This evaluation provides clear insights and allows us identify key research challenges of gaze estimation in the wild. |
![]() | Eduardo Velloso; Jayson Turner; Jason Alexander; Andreas Bulling; Hans Gellersen An Empirical Investigation of Gaze Selection in Mid-Air Gestural 3D Manipulation Inproceedings Proc. of the 15th IFIP TC13 Conference on Human-Computer Interaction (INTERACT 2015), pp. 315-330, 2015. @inproceedings{Velloso_Interact15a, title = {An Empirical Investigation of Gaze Selection in Mid-Air Gestural 3D Manipulation}, author = {Eduardo Velloso and Jayson Turner and Jason Alexander and Andreas Bulling and Hans Gellersen}, url = {https://perceptual.mpi-inf.mpg.de/files/2015/03/Velloso_Interact15a.pdf}, doi = {10.1007/978-3-319-22668-2_25}, year = {2015}, date = {2015-03-01}, booktitle = {Proc. of the 15th IFIP TC13 Conference on Human-Computer Interaction (INTERACT 2015)}, journal = {Proc. of the 15th IFIP TC13 Conference on Human-Computer Interaction (INTERACT 2015)}, pages = {315-330}, abstract = {In this work, we investigate gaze selection in the context of mid-air hand gestural manipulation of 3D rigid bodies in monoscopic displays. We present the results of a user study with 12 participants in which we compared the performance of Gaze, a Raycasting technique (2D Cursor) and a Virtual Hand technique (3D Cursor) to select objects in two 3D mid-air interaction tasks. Also, we compared selection confirmation times for Gaze selection when selection is followed by manipulation to when it is not. Our results show that gaze selection is faster and more preferred than 2D and 3D mid-air-controlled cursors, and is particularly well suited for tasks in which users constantly switch between several objects during the manipulation. Further, selection confirmation times are longer when selection is followed by manipulation than when it is not.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } In this work, we investigate gaze selection in the context of mid-air hand gestural manipulation of 3D rigid bodies in monoscopic displays. We present the results of a user study with 12 participants in which we compared the performance of Gaze, a Raycasting technique (2D Cursor) and a Virtual Hand technique (3D Cursor) to select objects in two 3D mid-air interaction tasks. Also, we compared selection confirmation times for Gaze selection when selection is followed by manipulation to when it is not. Our results show that gaze selection is faster and more preferred than 2D and 3D mid-air-controlled cursors, and is particularly well suited for tasks in which users constantly switch between several objects during the manipulation. Further, selection confirmation times are longer when selection is followed by manipulation than when it is not. |
![]() | Eduardo Velloso; Jason Alexander; Andreas Bulling; Hans Gellersen Interactions Under the Desk: A Characterisation of Foot Movements for Input in a Seated Position Inproceedings Proc. of the 15th IFIP TC13 Conference on Human-Computer Interaction (INTERACT 2015), pp. 384-401, 2015. @inproceedings{Velloso_Interact15b, title = {Interactions Under the Desk: A Characterisation of Foot Movements for Input in a Seated Position}, author = {Eduardo Velloso and Jason Alexander and Andreas Bulling and Hans Gellersen}, url = {https://perceptual.mpi-inf.mpg.de/files/2015/03/Velloso_Interact15b.pdf}, doi = {10.1007/978-3-319-22701-6_29}, year = {2015}, date = {2015-03-01}, booktitle = {Proc. of the 15th IFIP TC13 Conference on Human-Computer Interaction (INTERACT 2015)}, journal = {Proc. of the 15th IFIP TC13 Conference on Human-Computer Interaction (INTERACT 2015)}, pages = {384-401}, abstract = {This paper takes a bottom-up approach to characterising foot movements as input for users seated at computing systems. We conducted four user studies to characterise various aspects of foot-based interaction. First, we built unconstrained foot pointing performance models for 16 participants in a seated desktop setting using 1D and 2D ISO 9241-9-compliant Fitts’s Law tasks. Second, we evaluated the effect of the foot and direction in one-direction tasks, finding no effect of the foot used, but a significant effect of the direction in which targets are distributed. Third, we compared the use of one foot against two feet to control two independent variables, finding that while one foot is better suited for tasks with a spatial representation that matches its movement, there is little difference between the two feet techniques when it does not. Fourth, we analysed the overhead caused by introducing a feet-controlled variable in a mouse-based task, finding the feet to be comparable to the scroll wheel. The results of our studies show the feet are an effective method of enhancing our interaction with desktop systems; we use our findings to inform a series of design guidelines for such systems.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } This paper takes a bottom-up approach to characterising foot movements as input for users seated at computing systems. We conducted four user studies to characterise various aspects of foot-based interaction. First, we built unconstrained foot pointing performance models for 16 participants in a seated desktop setting using 1D and 2D ISO 9241-9-compliant Fitts’s Law tasks. Second, we evaluated the effect of the foot and direction in one-direction tasks, finding no effect of the foot used, but a significant effect of the direction in which targets are distributed. Third, we compared the use of one foot against two feet to control two independent variables, finding that while one foot is better suited for tasks with a spatial representation that matches its movement, there is little difference between the two feet techniques when it does not. Fourth, we analysed the overhead caused by introducing a feet-controlled variable in a mouse-based task, finding the feet to be comparable to the scroll wheel. The results of our studies show the feet are an effective method of enhancing our interaction with desktop systems; we use our findings to inform a series of design guidelines for such systems. |
![]() | Mélodie Vidal; Remi Bismuth; Andreas Bulling; Hans Gellersen The Royal Corgi: Exploring Social Gaze Interaction for Immersive Gameplay Inproceedings Proc. of the 33rd ACM SIGCHI Conference on Human Factors in Computing Systems (CHI 2015), pp. 115-124, 2015. @inproceedings{Vidal_CHI15, title = {The Royal Corgi: Exploring Social Gaze Interaction for Immersive Gameplay}, author = {Mélodie Vidal and Remi Bismuth and Andreas Bulling and Hans Gellersen}, url = {https://perceptual.mpi-inf.mpg.de/files/2015/01/Vidal_CHI15.pdf}, doi = {10.1145/2702123.2702163}, year = {2015}, date = {2015-01-11}, booktitle = {Proc. of the 33rd ACM SIGCHI Conference on Human Factors in Computing Systems (CHI 2015)}, journal = {Proc. of the 33rd ACM SIGCHI Conference on Human Factors in Computing Systems (CHI 2015)}, pages = {115-124}, abstract = {The eyes are a rich channel for non-verbal communication in our daily interactions. We propose social gaze interaction as a game mechanic to enhance user interactions with virtual characters. We develop a game from the ground-up in which characters are esigned to be reactive to the player’s gaze in social ways, such as etting annoyed when the player seems distracted or changing their dialogue depending on the player’s apparent focus of ttention. Results from a qualitative user study provide insights bout how social gaze interaction is intuitive for users, elicits deep feelings of immersion, and highlight the players’ self-consciousness of their own eye movements through their strong reactions to the characters.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } The eyes are a rich channel for non-verbal communication in our daily interactions. We propose social gaze interaction as a game mechanic to enhance user interactions with virtual characters. We develop a game from the ground-up in which characters are esigned to be reactive to the player’s gaze in social ways, such as etting annoyed when the player seems distracted or changing their dialogue depending on the player’s apparent focus of ttention. Results from a qualitative user study provide insights bout how social gaze interaction is intuitive for users, elicits deep feelings of immersion, and highlight the players’ self-consciousness of their own eye movements through their strong reactions to the characters. |
![]() | Jayson Turner; Jason Alexander; Andreas Bulling; Hans Gellersen Gaze+RST: Integrating Gaze and Multitouch for Remote Rotate-Scale-Translate Tasks Inproceedings Proc. of the 33rd ACM SIGCHI Conference on Human Factors in Computing Systems (CHI 2015), pp. 4179-4188, 2015. @inproceedings{Turner_CHI15, title = {Gaze+RST: Integrating Gaze and Multitouch for Remote Rotate-Scale-Translate Tasks}, author = {Jayson Turner and Jason Alexander and Andreas Bulling and Hans Gellersen}, url = {https://perceptual.mpi-inf.mpg.de/files/2015/01/turner_chi15.pdf}, doi = {10.1145/2702123.2702355}, year = {2015}, date = {2015-01-11}, booktitle = {Proc. of the 33rd ACM SIGCHI Conference on Human Factors in Computing Systems (CHI 2015)}, journal = {Proc. of the 33rd ACM SIGCHI Conference on Human Factors in Computing Systems (CHI 2015)}, pages = {4179-4188}, abstract = {Our work investigates the use of gaze and multitouch to flu- idly perform rotate-scale translate (RST) tasks on large dis- plays. The work specifically aims to understand if gaze can provide benefit in such a task, how task complexity af- fects performance, and how gaze and multitouch can be com- bined to create an integral input structure suited to the task of RST. We present four techniques that individually strike a different balance between gaze-based and touch-based trans- lation while maintaining concurrent rotation and scaling op- erations. A 16 participant empirical evaluation revealed that three of our four techniques present viable options for this scenario, and that larger distances and rotation/scaling opera- tions can significantly affect a gaze-based translation configu- ration. Furthermore we uncover new insights regarding mul- timodal integrality, finding that gaze and touch can be com- bined into configurations that pertain to integral or separable input structures.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Our work investigates the use of gaze and multitouch to flu- idly perform rotate-scale translate (RST) tasks on large dis- plays. The work specifically aims to understand if gaze can provide benefit in such a task, how task complexity af- fects performance, and how gaze and multitouch can be com- bined to create an integral input structure suited to the task of RST. We present four techniques that individually strike a different balance between gaze-based and touch-based trans- lation while maintaining concurrent rotation and scaling op- erations. A 16 participant empirical evaluation revealed that three of our four techniques present viable options for this scenario, and that larger distances and rotation/scaling opera- tions can significantly affect a gaze-based translation configu- ration. Furthermore we uncover new insights regarding mul- timodal integrality, finding that gaze and touch can be com- bined into configurations that pertain to integral or separable input structures. |
![]() | Robert Walter; Andreas Bulling; David Lindlbauer; Martin Schuessler; Jörg Müller Analyzing Visual Attention During Whole Body Interaction with Public Displays Inproceedings Proc. of the 2015 ACM International Joint Conference on Pervasive and Ubiquitous Computing (UbiComp 2015), pp. 1263-1267, 2015. @inproceedings{Walter_Ubicomp15, title = {Analyzing Visual Attention During Whole Body Interaction with Public Displays}, author = {Robert Walter and Andreas Bulling and David Lindlbauer and Martin Schuessler and Jörg Müller}, url = {https://perceptual.mpi-inf.mpg.de/files/2015/07/Walter_Ubicomp15.pdf https://www.youtube.com/watch?v=JlEnUyhQ1cY}, doi = {10.1145/2750858.280425}, year = {2015}, date = {2015-01-01}, booktitle = {Proc. of the 2015 ACM International Joint Conference on Pervasive and Ubiquitous Computing (UbiComp 2015)}, pages = {1263-1267}, abstract = {While whole body interaction can enrich user experience on public displays, it remains unclear how common visualizations of user representations impact users' ability to perceive content on the display. In this work we use a head-mounted eye tracker to record visual behavior of 25 users interacting with a public display game that uses a silhouette user representation, mirroring the users' movements. Results from visual attention analysis as well as post-hoc recall and recognition tasks on display contents reveal that visual attention is mostly on users' silhouette while peripheral screen elements remain largely unattended. In our experiment, content attached to the user representation attracted significantly more attention than other screen contents, while content placed at the top and bottom of the screen attracted significantly less. Screen contents attached to the user representation were also significantly better remembered than those at the top and bottom of the screen.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } While whole body interaction can enrich user experience on public displays, it remains unclear how common visualizations of user representations impact users' ability to perceive content on the display. In this work we use a head-mounted eye tracker to record visual behavior of 25 users interacting with a public display game that uses a silhouette user representation, mirroring the users' movements. Results from visual attention analysis as well as post-hoc recall and recognition tasks on display contents reveal that visual attention is mostly on users' silhouette while peripheral screen elements remain largely unattended. In our experiment, content attached to the user representation attracted significantly more attention than other screen contents, while content placed at the top and bottom of the screen attracted significantly less. Screen contents attached to the user representation were also significantly better remembered than those at the top and bottom of the screen. |
![]() | Erroll Wood; Tadas Baltrusaitis; Xucong Zhang; Yusuke Sugano; Peter Robinson; Andreas Bulling Rendering of Eyes for Eye-Shape Registration and Gaze Estimation Inproceedings Proc. of the IEEE International Conference on Computer Vision (ICCV 2015), pp. 3756-3764, 2015. @inproceedings{wood2015_iccv, title = {Rendering of Eyes for Eye-Shape Registration and Gaze Estimation}, author = {Erroll Wood and Tadas Baltrusaitis and Xucong Zhang and Yusuke Sugano and Peter Robinson and Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/wp-content/blogs.dir/12/files/2016/06/wood2015_iccv.pdf http://www.technologyreview.com/view/537891/virtual-eyes-train-deep-learning-algorithm-to-recognize-gaze-direction/ http://www.cl.cam.ac.uk/research/rainbow/projects/syntheseyes/}, doi = {10.1109/ICCV.2015.428}, year = {2015}, date = {2015-01-01}, booktitle = {Proc. of the IEEE International Conference on Computer Vision (ICCV 2015)}, pages = {3756-3764}, abstract = {Images of the eye are key in several computer vision problems, such as shape registration and gaze estimation. Recent large-scale supervised methods for these problems require time-consuming data collection and manual annotation, which can be unreliable. We propose synthesizing perfectly labelled photo-realistic training data in a fraction of the time. We used computer graphics techniques to build a collection of dynamic eye-region models from head scan geometry. These were randomly posed to synthesize close-up eye images for a wide range of head poses, gaze directions, and illumination conditions. We used our model's controllability to verify the importance of realistic illumination and shape variations in eye-region training data. Finally, we demonstrate the benefits of our synthesized training data (SynthesEyes) by out-performing state-of-the-art methods for eye-shape registration as well as cross-dataset appearance-based gaze estimation in the wild.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Images of the eye are key in several computer vision problems, such as shape registration and gaze estimation. Recent large-scale supervised methods for these problems require time-consuming data collection and manual annotation, which can be unreliable. We propose synthesizing perfectly labelled photo-realistic training data in a fraction of the time. We used computer graphics techniques to build a collection of dynamic eye-region models from head scan geometry. These were randomly posed to synthesize close-up eye images for a wide range of head poses, gaze directions, and illumination conditions. We used our model's controllability to verify the importance of realistic illumination and shape variations in eye-region training data. Finally, we demonstrate the benefits of our synthesized training data (SynthesEyes) by out-performing state-of-the-art methods for eye-shape registration as well as cross-dataset appearance-based gaze estimation in the wild. |
technical reports
![]() | Iaroslav Shcherbatyi; Andreas Bulling; Mario Fritz GazeDPM: Early Integration of Gaze Information in Deformable Part Models Technical Report arXiv:1505.05753, 2015. @techreport{Shcherbatyi15_arxiv, title = {GazeDPM: Early Integration of Gaze Information in Deformable Part Models}, author = {Iaroslav Shcherbatyi and Andreas Bulling and Mario Fritz}, url = {http://arxiv.org/abs/1505.05753 https://perceptual.mpi-inf.mpg.de/files/2015/05/Shcherbatyi15_arxiv.pdf}, year = {2015}, date = {2015-05-21}, abstract = {An increasing number of works explore collaborative human-computer systems in which human gaze is used to enhance computer vision systems. For object detection these efforts were so far restricted to late integration approaches that have inherent limitations, such as increased precision without increase in recall. We propose an early integration approach in a deformable part model, which constitutes a joint formulation over gaze and visual data. We show that our GazeDPM method improves over the state-of-the-art DPM baseline by 4% and a recent method for gaze-supported object detection by 3% on the public POET dataset. Our approach additionally provides introspection of the learnt models, can reveal salient image structures, and allows us to investigate the interplay between gaze attracting and repelling areas, the importance of view-specific models, as well as viewers' personal biases in gaze patterns. We finally study important practical aspects of our approach, such as the impact of using saliency maps instead of real fixations, the impact of the number of fixations, as well as robustness to gaze estimation error. }, type = {arXiv:1505.05753}, keywords = {}, pubstate = {published}, tppubtype = {techreport} } An increasing number of works explore collaborative human-computer systems in which human gaze is used to enhance computer vision systems. For object detection these efforts were so far restricted to late integration approaches that have inherent limitations, such as increased precision without increase in recall. We propose an early integration approach in a deformable part model, which constitutes a joint formulation over gaze and visual data. We show that our GazeDPM method improves over the state-of-the-art DPM baseline by 4% and a recent method for gaze-supported object detection by 3% on the public POET dataset. Our approach additionally provides introspection of the learnt models, can reveal salient image structures, and allows us to investigate the interplay between gaze attracting and repelling areas, the importance of view-specific models, as well as viewers' personal biases in gaze patterns. We finally study important practical aspects of our approach, such as the impact of using saliency maps instead of real fixations, the impact of the number of fixations, as well as robustness to gaze estimation error. |
![]() | Michael Barz; Andreas Bulling; Florian Daiber Computational Modelling and Prediction of Gaze Estimation Error for Head-mounted Eye Trackers Technical Report German Research Center for Artificial Intelligence (DFKI) , 2015. @techreport{Barz_Rep15, title = {Computational Modelling and Prediction of Gaze Estimation Error for Head-mounted Eye Trackers}, author = {Michael Barz and Andreas Bulling and Florian Daiber }, url = {http://www.dfki.de/web/forschung/publikationen?pubid=7619 https://perceptual.mpi-inf.mpg.de/files/2016/01/Barz16_techRep.pdf}, year = {2015}, date = {2015-01-01}, volume = {1}, pages = {10}, institution = {German Research Center for Artificial Intelligence (DFKI)}, series = {DFKI Research Reports, RR}, abstract = {Head-mounted eye tracking has significant potential for mobile gaze-based interaction with ambient displays but current interfaces lack information about the tracker's gaze estimation error. Consequently, current interfaces do not exploit the full potential of gaze input as the inherent estimation error can not be dealt with. The error depends on the physical properties of the display and constantly varies with changes in position and distance of the user to the display. In this work we present a computational model of gaze estimation error for head-mounted eye trackers. Our model covers the full processing pipeline for mobile gaze estimation, namely mapping of pupil positions to scene camera coordinates, marker-based display detection, and display mapping. We build the model based on a series of controlled measurements of a sample state-of-the-art monocular head-mounted eye tracker. Results show that our model can predict gaze estimation error with a root mean squared error of 17.99~px ($1.96^\circ$). }, keywords = {}, pubstate = {published}, tppubtype = {techreport} } Head-mounted eye tracking has significant potential for mobile gaze-based interaction with ambient displays but current interfaces lack information about the tracker's gaze estimation error. Consequently, current interfaces do not exploit the full potential of gaze input as the inherent estimation error can not be dealt with. The error depends on the physical properties of the display and constantly varies with changes in position and distance of the user to the display. In this work we present a computational model of gaze estimation error for head-mounted eye trackers. Our model covers the full processing pipeline for mobile gaze estimation, namely mapping of pupil positions to scene camera coordinates, marker-based display detection, and display mapping. We build the model based on a series of controlled measurements of a sample state-of-the-art monocular head-mounted eye tracker. Results show that our model can predict gaze estimation error with a root mean squared error of 17.99~px ($1.96^circ$). |
![]() | Xucong Zhang; Yusuke Sugano; Mario Fritz; Andreas Bulling Appearance-Based Gaze Estimation in the Wild Technical Report arXiv:1504.02863, 2015. @techreport{zhang2015appearance, title = {Appearance-Based Gaze Estimation in the Wild}, author = { Xucong Zhang and Yusuke Sugano and Mario Fritz and Andreas Bulling}, url = {http://arxiv.org/abs/1504.02863 https://perceptual.mpi-inf.mpg.de/files/2015/04/Zhang15_arxiv.pdf}, year = {2015}, date = {2015-01-01}, journal = {arXiv preprint arXiv:1504.02863}, abstract = {Appearance-based gaze estimation is believed to work well in real-world settings, but existing datasets have been collected under controlled laboratory conditions and methods have been not evaluated across multiple datasets. In this work we study appearance-based gaze estimation in the wild. We present the MPIIGaze dataset that contains 213,659 images we collected from 15 participants during natural everyday laptop use over more than three months. Our dataset is significantly more variable than existing ones with respect to appearance and illumination. We also present a method for in-the-wild appearance-based gaze estimation using multimodal convolutional neural networks that significantly outperforms state-of-the art methods in the most challenging cross-dataset evaluation. We present an extensive evaluation of several state-of-the-art image-based gaze estimation algorithms on three current datasets, including our own. This evaluation provides clear insights and allows us to identify key research challenges of gaze estimation in the wild. }, type = {arXiv:1504.02863}, keywords = {}, pubstate = {published}, tppubtype = {techreport} } Appearance-based gaze estimation is believed to work well in real-world settings, but existing datasets have been collected under controlled laboratory conditions and methods have been not evaluated across multiple datasets. In this work we study appearance-based gaze estimation in the wild. We present the MPIIGaze dataset that contains 213,659 images we collected from 15 participants during natural everyday laptop use over more than three months. Our dataset is significantly more variable than existing ones with respect to appearance and illumination. We also present a method for in-the-wild appearance-based gaze estimation using multimodal convolutional neural networks that significantly outperforms state-of-the art methods in the most challenging cross-dataset evaluation. We present an extensive evaluation of several state-of-the-art image-based gaze estimation algorithms on three current datasets, including our own. This evaluation provides clear insights and allows us to identify key research challenges of gaze estimation in the wild. |
![]() | Erroll Wood; Tadas Baltrusaitis; Xucong Zhang; Yusuke Sugano; Peter Robinson; Andreas Bulling Rendering of Eyes for Eye-Shape Registration and Gaze Estimation Technical Report arXiv:1505.05916, 2015. @techreport{wood2015rendering, title = {Rendering of Eyes for Eye-Shape Registration and Gaze Estimation}, author = { Erroll Wood and Tadas Baltrusaitis and Xucong Zhang and Yusuke Sugano and Peter Robinson and Andreas Bulling}, url = {http://arxiv.org/abs/1505.05916 https://perceptual.mpi-inf.mpg.de/files/2015/05/Wood_arxiv15.pdf}, year = {2015}, date = {2015-01-01}, journal = {arXiv preprint arXiv:1505.05916}, abstract = {Images of the eye are key in several computer vision problems, such as shape registration and gaze estimation. Recent large-scale supervised methods for these problems require time-consuming data collection and manual annotation, which can be unreliable. We propose synthesizing perfectly labelled photo-realistic training data in a fraction of the time. We used computer graphics techniques to build a collection of dynamic eye-region models from head scan geometry. These were randomly posed to synthesize close-up eye images for a wide range of head poses, gaze directions, and illumination conditions. We used our model's controllability to verify the importance of realistic illumination and shape variations in eye-region training data. Finally, we demonstrate the benefits of our synthesized training data (SynthesEyes) by out-performing state-of-the-art methods for eye-shape registration as well as cross-dataset appearance-based gaze estimation in the wild. }, type = {arXiv:1505.05916}, keywords = {}, pubstate = {published}, tppubtype = {techreport} } Images of the eye are key in several computer vision problems, such as shape registration and gaze estimation. Recent large-scale supervised methods for these problems require time-consuming data collection and manual annotation, which can be unreliable. We propose synthesizing perfectly labelled photo-realistic training data in a fraction of the time. We used computer graphics techniques to build a collection of dynamic eye-region models from head scan geometry. These were randomly posed to synthesize close-up eye images for a wide range of head poses, gaze directions, and illumination conditions. We used our model's controllability to verify the importance of realistic illumination and shape variations in eye-region training data. Finally, we demonstrate the benefits of our synthesized training data (SynthesEyes) by out-performing state-of-the-art methods for eye-shape registration as well as cross-dataset appearance-based gaze estimation in the wild. |
![]() | Hosnieh Sattar; Sabine Müller; Mario Fritz; Andreas Bulling Prediction of search targets from fixations in open-world settings Technical Report arXiv:1502.05137, 2015. @techreport{sattar2015prediction, title = {Prediction of search targets from fixations in open-world settings}, author = {Hosnieh Sattar and Sabine Müller and Mario Fritz and Andreas Bulling}, url = {http://arxiv.org/abs/1502.05137 https://perceptual.mpi-inf.mpg.de/files/2015/04/sattar15_arxiv.pdf}, year = {2015}, date = {2015-01-01}, journal = {arXiv preprint arXiv:1502.05137}, abstract = {Previous work on predicting the target of visual search from human fixations only considered closed-world settings in which training labels are available and predictions are performed for a known set of potential targets. In this work we go beyond the state of the art by studying search target prediction in an open-world setting in which we no longer assume that we have fixation data to train for the search targets. We present a dataset containing fixation data of 18 users searching for natural images from three image categories within synthesised image collages of about 80 images. In a closed-world baseline experiment we show that we can predict the correct target image out of a candidate set of five images. We then present a new problem formulation for search target prediction in the open-world setting that is based on learning compatibilities between fixations and potential targets. }, type = {arXiv:1502.05137}, keywords = {}, pubstate = {published}, tppubtype = {techreport} } Previous work on predicting the target of visual search from human fixations only considered closed-world settings in which training labels are available and predictions are performed for a known set of potential targets. In this work we go beyond the state of the art by studying search target prediction in an open-world setting in which we no longer assume that we have fixation data to train for the search targets. We present a dataset containing fixation data of 18 users searching for natural images from three image categories within synthesised image collages of about 80 images. In a closed-world baseline experiment we show that we can predict the correct target image out of a candidate set of five images. We then present a new problem formulation for search target prediction in the open-world setting that is based on learning compatibilities between fixations and potential targets. |
![]() | Christian Lander; Sven Gehring; Antonio Krüger; Sebastian Boring; Andreas Bulling GazeProjector: Location-independent Gaze Interaction on and Across Multiple Displays Technical Report German Research Center for Artificial Intelligence (DFKI) 2015. @techreport{lander15_techrep, title = {GazeProjector: Location-independent Gaze Interaction on and Across Multiple Displays }, author = {Christian Lander and Sven Gehring and Antonio Krüger and Sebastian Boring and Andreas Bulling}, url = {https://perceptual.mpi-inf.mpg.de/files/2015/01/DFKI_Techreport1.pdf}, year = {2015}, date = {2015-01-01}, institution = {German Research Center for Artificial Intelligence (DFKI) }, abstract = {Mobile gaze-based interaction with multiple displays may occur from arbitrary positions and orientations. However, maintaining high gaze estimation accuracy still represents a significant challenge. To address this, we present GazeProjector, a system that combines accurate point-of-gaze estimation with natural feature tracking on displays to determine the mobile eye tracker’s position relative to a display. The detected eye positions are transformed onto that display allowing for gaze-based interaction. This allows for seamless gaze estimation and interaction on (1) multiple displays of arbitrary sizes, (2) independently of the user’s position and orientation to the display. In a user study with 12 participants we compared GazeProjector to existing well- established methods such as visual on-screen markers and a state-of-the-art motion capture system. Our results show that our approach is robust to varying head poses, orientations, and distances to the display, while still providing high gaze estimation accuracy across multiple displays without re-calibration. The system represents an important step towards the vision of pervasive gaze-based interfaces.}, keywords = {}, pubstate = {published}, tppubtype = {techreport} } Mobile gaze-based interaction with multiple displays may occur from arbitrary positions and orientations. However, maintaining high gaze estimation accuracy still represents a significant challenge. To address this, we present GazeProjector, a system that combines accurate point-of-gaze estimation with natural feature tracking on displays to determine the mobile eye tracker’s position relative to a display. The detected eye positions are transformed onto that display allowing for gaze-based interaction. This allows for seamless gaze estimation and interaction on (1) multiple displays of arbitrary sizes, (2) independently of the user’s position and orientation to the display. In a user study with 12 participants we compared GazeProjector to existing well- established methods such as visual on-screen markers and a state-of-the-art motion capture system. Our results show that our approach is robust to varying head poses, orientations, and distances to the display, while still providing high gaze estimation accuracy across multiple displays without re-calibration. The system represents an important step towards the vision of pervasive gaze-based interfaces. |