User-agent: * # Uncomment the following line ONLY if sitemaps.org or HTML sitemaps are used # and you have verified that your site is being indexed correctly. # Disallow: /browse # You also may wish to disallow access to the following paths, in order # to stop web spiders from accessing user-based content: # Disallow: /advanced-search # Disallow: /contact # Disallow: /feedback # Disallow: /forgot # Disallow: /login # Disallow: /register # Disallow: /search # custom config for the VUW-GSA machine # ===================================== # these are the 'standard' disallows from above + the collection homepages, # this should show only the item pages. User-agent: gsa-crawler Disallow: /advanced-search # BF - 07/02/2013 - disallow advanced search for collection Disallow: /handle/*advanced-search Disallow: /contact Disallow: /feedback Disallow: /forgot Disallow: /login Disallow: /register Disallow: /search # BF - 20/12/2012 - Removed disallow on handle paths eg /handle/10063/1 # BF - 20/12/2012 - Added disallow on all bitstream content (ie pdf,s images, etc) Disallow: /bitstream/handle/ Disallow: /*/statistics$ Disallow: /*full$ Disallow: /browse # BF 07/02/2013 - disallow browse by collection Disallow: /handle/*browse Disallow: /feed