base/SOURCES/BZ-1342179-add-retry-no-cac...

diff -up urlgrabber-3.10/test/test_mirror.py.orig urlgrabber-3.10/test/test_mirror.py
--- urlgrabber-3.10/test/test_mirror.py.orig	2013-08-26 09:09:07.000000000 +0200
+++ urlgrabber-3.10/test/test_mirror.py	2016-06-29 18:26:06.790393129 +0200
@@ -268,33 +268,55 @@ class ActionTests(TestCase):
         self.assertEquals(self.g.calls, expected_calls)
         self.assertEquals(urlgrabber.mirror.DEBUG.logs, expected_logs)
                 
+import thread, socket
+LOCALPORT = 'localhost', 2000
 
 class HttpReplyCode(TestCase):
     def setUp(self):
+        # start the server
+        self.exit = False
         def server():
-            import socket
             s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
             s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-            s.bind(('localhost', 2000)); s.listen(1)
+            s.bind(LOCALPORT); s.listen(1)
             while 1:
                 c, a = s.accept()
+                if self.exit: c.close(); break
                 while not c.recv(4096).endswith('\r\n\r\n'): pass
                 c.sendall('HTTP/1.1 %d %s\r\n' % self.reply)
+                if self.content is not None:
+                    c.sendall('Content-Length: %d\r\n\r\n' % len(self.content))
+                    c.sendall(self.content)
                 c.close()
-        import thread
-        self.reply = 503, "Busy"
+            s.close()
+            self.exit = False
         thread.start_new_thread(server, ())
 
+        # create grabber and mirror group objects
         def failure(obj):
             self.code = getattr(obj.exception, 'code', None)
             return {}
         self.g  = URLGrabber()
-        self.mg = MirrorGroup(self.g, ['http://localhost:2000/'], failure_callback = failure)
+        self.mg = MirrorGroup(self.g, ['http://%s:%d' % LOCALPORT],
+                              failure_callback = failure)
+
+    def tearDown(self):
+        # shut down the server
+        self.exit = True
+        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        s.connect(LOCALPORT); s.close() # wake it up
+        while self.exit: pass # poor man's join
 
     def test_grab(self):
+        'tests the propagation of HTTP reply code'
+        self.reply = 503, "Busy"
+        self.content = None
+
+        # single
         self.assertRaises(URLGrabError, self.mg.urlgrab, 'foo')
         self.assertEquals(self.code, 503); del self.code
 
+        # multi
         err = []
         self.mg.urlgrab('foo', async = True, failfunc = err.append)
         urlgrabber.grabber.parallel_wait()
diff -up urlgrabber-3.10/test/test_mirror.py.orig urlgrabber-3.10/test/test_mirror.py
--- urlgrabber-3.10/test/test_mirror.py.orig	2016-06-29 18:26:06.790393129 +0200
+++ urlgrabber-3.10/test/test_mirror.py	2016-06-29 18:26:58.886148544 +0200
@@ -268,13 +268,14 @@ class ActionTests(TestCase):
         self.assertEquals(self.g.calls, expected_calls)
         self.assertEquals(urlgrabber.mirror.DEBUG.logs, expected_logs)
                 
-import thread, socket
+import threading, socket
 LOCALPORT = 'localhost', 2000
 
 class HttpReplyCode(TestCase):
     def setUp(self):
         # start the server
         self.exit = False
+        self.process = lambda data: None
         def server():
             s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
             s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
@@ -282,7 +283,10 @@ class HttpReplyCode(TestCase):
             while 1:
                 c, a = s.accept()
                 if self.exit: c.close(); break
-                while not c.recv(4096).endswith('\r\n\r\n'): pass
+                data = ''
+                while not data.endswith('\r\n\r\n'):
+                    data = c.recv(4096)
+                self.process(data)
                 c.sendall('HTTP/1.1 %d %s\r\n' % self.reply)
                 if self.content is not None:
                     c.sendall('Content-Length: %d\r\n\r\n' % len(self.content))
@@ -290,7 +294,8 @@ class HttpReplyCode(TestCase):
                 c.close()
             s.close()
             self.exit = False
-        thread.start_new_thread(server, ())
+        self.thread = threading.Thread(target=server)
+        self.thread.start()
 
         # create grabber and mirror group objects
         def failure(obj):
@@ -305,7 +310,7 @@ class HttpReplyCode(TestCase):
         self.exit = True
         s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
         s.connect(LOCALPORT); s.close() # wake it up
-        while self.exit: pass # poor man's join
+        self.thread.join()
 
     def test_grab(self):
         'tests the propagation of HTTP reply code'
@@ -323,6 +328,45 @@ class HttpReplyCode(TestCase):
         self.assertEquals([e.exception.errno for e in err], [256])
         self.assertEquals(self.code, 503); del self.code
 
+    def test_retry_no_cache(self):
+        'test bypassing proxy cache on failure'
+        def process(data):
+            if 'Pragma:no-cache' in data:
+                self.content = 'version2'
+            else:
+                self.content = 'version1'
+
+        def checkfunc_read(obj):
+            if obj.data == 'version1':
+                raise URLGrabError(-1, 'Outdated version of foo')
+
+        def checkfunc_grab(obj):
+            with open('foo') as f:
+                if f.read() == 'version1':
+                    raise URLGrabError(-1, 'Outdated version of foo')
+
+        self.process = process
+        self.reply = 200, "OK"
+
+        opts = self.g.opts
+        opts.retry = 3
+        opts.retry_no_cache = True
+
+        # single
+        opts.checkfunc = checkfunc_read
+        try:
+            self.mg.urlread('foo')
+        except URLGrabError as e:
+            self.fail(str(e))
+
+        # multi
+        opts.checkfunc = checkfunc_grab
+        self.mg.urlgrab('foo', async=True)
+        try:
+            urlgrabber.grabber.parallel_wait()
+        except URLGrabError as e:
+            self.fail(str(e))
+
 def suite():
     tl = TestLoader()
     return tl.loadTestsFromModule(sys.modules[__name__])
diff -up urlgrabber-3.10/urlgrabber/grabber.py.orig urlgrabber-3.10/urlgrabber/grabber.py
--- urlgrabber-3.10/urlgrabber/grabber.py.orig	2016-06-29 18:25:53.964453346 +0200
+++ urlgrabber-3.10/urlgrabber/grabber.py	2016-06-29 18:26:58.886148544 +0200
@@ -171,6 +171,12 @@ GENERAL ARGUMENTS (kwargs)
     The libproxy code is only used if the proxies dictionary
     does not provide any proxies.
 
+  no_cache = False
+
+    When True, server-side cache will be disabled for http and https
+    requests.  This is equivalent to setting
+      http_headers = (('Pragma', 'no-cache'),)
+
   prefix = None
 
     a url prefix that will be prepended to all requested urls.  For
@@ -383,10 +389,11 @@ RETRY RELATED ARGUMENTS
     identical to checkfunc, except for the attributes defined in the
     CallbackObject instance.  The attributes for failure_callback are:
 
-      exception = the raised exception
-      url       = the url we're trying to fetch
-      tries     = the number of tries so far (including this one)
-      retry     = the value of the retry option
+      exception      = the raised exception
+      url            = the url we're trying to fetch
+      tries          = the number of tries so far (including this one)
+      retry          = the value of the retry option
+      retry_no_cache = the value of the retry_no_cache option
 
     The callback is present primarily to inform the calling program of
     the failure, but if it raises an exception (including the one it's
@@ -431,6 +438,19 @@ RETRY RELATED ARGUMENTS
     passed the same arguments, so you could use the same function for
     both.
       
+  retry_no_cache = False
+
+    When True, automatically enable no_cache for future retries if
+    checkfunc performs an unsuccessful check.
+
+    This option is useful if your application expects a set of files
+    from the same server to form an atomic unit and you write your
+    checkfunc to ensure each file being downloaded belongs to such a
+    unit.  If transparent proxy caching is in effect, the files can
+    become out-of-sync, disrupting the atomicity.  Enabling this option
+    will prevent that, while ensuring that you still enjoy the benefits
+    of caching when possible.
+
 BANDWIDTH THROTTLING
 
   urlgrabber supports throttling via two values: throttle and
@@ -1001,6 +1021,8 @@ class URLGrabberOptions:
         self.half_life = 30*24*60*60 # 30 days
         self.default_speed = 1e6 # 1 MBit
         self.ftp_disable_epsv = False
+        self.no_cache = False
+        self.retry_no_cache = False
         
     def __repr__(self):
         return self.format()
@@ -1077,7 +1099,8 @@ class URLGrabber(object):
             if callback:
                 if DEBUG: DEBUG.info('calling callback: %s', callback)
                 obj = CallbackObject(exception=exception, url=args[0],
-                                     tries=tries, retry=opts.retry)
+                                     tries=tries, retry=opts.retry,
+                                     retry_no_cache=opts.retry_no_cache)
                 _run_callback(callback, obj)
 
             if (opts.retry is None) or (tries == opts.retry):
@@ -1089,6 +1112,8 @@ class URLGrabber(object):
                 if DEBUG: DEBUG.info('retrycode (%i) not in list %s, re-raising',
                                      retrycode, opts.retrycodes)
                 raise
+            if retrycode is not None and retrycode < 0 and opts.retry_no_cache:
+                opts.no_cache = True
     
     def urlopen(self, url, opts=None, **kwargs):
         """open the url and return a file object
@@ -1429,11 +1454,15 @@ class PyCurlFileObject(object):
                 self.curl_obj.setopt(pycurl.SSLKEYPASSWD, opts.ssl_key_pass)
 
         #headers:
-        if opts.http_headers and self.scheme in ('http', 'https'):
+        if self.scheme in ('http', 'https'):
             headers = []
-            for (tag, content) in opts.http_headers:
-                headers.append('%s:%s' % (tag, content))
-            self.curl_obj.setopt(pycurl.HTTPHEADER, headers)
+            if opts.http_headers is not None:
+                for (tag, content) in opts.http_headers:
+                    headers.append('%s:%s' % (tag, content))
+            if opts.no_cache:
+                headers.append('Pragma:no-cache')
+            if headers:
+                self.curl_obj.setopt(pycurl.HTTPHEADER, headers)
 
         # ranges:
         if opts.range or opts.reget:
@@ -2055,7 +2084,8 @@ class _ExternalDownloader:
         'ssl_key_pass',
         'ssl_verify_peer', 'ssl_verify_host',
         'size', 'max_header_size', 'ip_resolve',
-        'ftp_disable_epsv'
+        'ftp_disable_epsv',
+        'no_cache',
     )
 
     def start(self, opts):
@@ -2236,6 +2266,8 @@ def parallel_wait(meter=None):
                 except URLGrabError, ug_err:
                     retry = 0 # no retries
             if opts.tries < retry and ug_err.errno in opts.retrycodes:
+                if ug_err.errno < 0 and opts.retry_no_cache:
+                    opts.no_cache = True
                 start(opts, opts.tries + 1) # simple retry
                 continue
python-urlgrabber package update Signed-off-by: basebuilder_pel7ppc64bebuilder0 <basebuilder@powerel.org> 7 years ago			`diff -up urlgrabber-3.10/test/test_mirror.py.orig urlgrabber-3.10/test/test_mirror.py`
			`--- urlgrabber-3.10/test/test_mirror.py.orig 2013-08-26 09:09:07.000000000 +0200`
			`+++ urlgrabber-3.10/test/test_mirror.py 2016-06-29 18:26:06.790393129 +0200`
			`@@ -268,33 +268,55 @@ class ActionTests(TestCase):`
			`self.assertEquals(self.g.calls, expected_calls)`
			`self.assertEquals(urlgrabber.mirror.DEBUG.logs, expected_logs)`

			`+import thread, socket`
			`+LOCALPORT = 'localhost', 2000`

			`class HttpReplyCode(TestCase):`
			`def setUp(self):`
			`+ # start the server`
			`+ self.exit = False`
			`def server():`
			`- import socket`
			`s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)`
			`s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)`
			`- s.bind(('localhost', 2000)); s.listen(1)`
			`+ s.bind(LOCALPORT); s.listen(1)`
			`while 1:`
			`c, a = s.accept()`
			`+ if self.exit: c.close(); break`
			`while not c.recv(4096).endswith('\r\n\r\n'): pass`
			`c.sendall('HTTP/1.1 %d %s\r\n' % self.reply)`
			`+ if self.content is not None:`
			`+ c.sendall('Content-Length: %d\r\n\r\n' % len(self.content))`
			`+ c.sendall(self.content)`
			`c.close()`
			`- import thread`
			`- self.reply = 503, "Busy"`
			`+ s.close()`
			`+ self.exit = False`
			`thread.start_new_thread(server, ())`

			`+ # create grabber and mirror group objects`
			`def failure(obj):`
			`self.code = getattr(obj.exception, 'code', None)`
			`return {}`
			`self.g = URLGrabber()`
			`- self.mg = MirrorGroup(self.g, ['http://localhost:2000/'], failure_callback = failure)`
			`+ self.mg = MirrorGroup(self.g, ['http://%s:%d' % LOCALPORT],`
			`+ failure_callback = failure)`
			`+`
			`+ def tearDown(self):`
			`+ # shut down the server`
			`+ self.exit = True`
			`+ s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)`
			`+ s.connect(LOCALPORT); s.close() # wake it up`
			`+ while self.exit: pass # poor man's join`

			`def test_grab(self):`
			`+ 'tests the propagation of HTTP reply code'`
			`+ self.reply = 503, "Busy"`
			`+ self.content = None`
			`+`
			`+ # single`
			`self.assertRaises(URLGrabError, self.mg.urlgrab, 'foo')`
			`self.assertEquals(self.code, 503); del self.code`

			`+ # multi`
			`err = []`
			`self.mg.urlgrab('foo', async = True, failfunc = err.append)`
			`urlgrabber.grabber.parallel_wait()`
			`diff -up urlgrabber-3.10/test/test_mirror.py.orig urlgrabber-3.10/test/test_mirror.py`
			`--- urlgrabber-3.10/test/test_mirror.py.orig 2016-06-29 18:26:06.790393129 +0200`
			`+++ urlgrabber-3.10/test/test_mirror.py 2016-06-29 18:26:58.886148544 +0200`
			`@@ -268,13 +268,14 @@ class ActionTests(TestCase):`
			`self.assertEquals(self.g.calls, expected_calls)`
			`self.assertEquals(urlgrabber.mirror.DEBUG.logs, expected_logs)`

			`-import thread, socket`
			`+import threading, socket`
			`LOCALPORT = 'localhost', 2000`

			`class HttpReplyCode(TestCase):`
			`def setUp(self):`
			`# start the server`
			`self.exit = False`
			`+ self.process = lambda data: None`
			`def server():`
			`s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)`
			`s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)`
			`@@ -282,7 +283,10 @@ class HttpReplyCode(TestCase):`
			`while 1:`
			`c, a = s.accept()`
			`if self.exit: c.close(); break`
			`- while not c.recv(4096).endswith('\r\n\r\n'): pass`
			`+ data = ''`
			`+ while not data.endswith('\r\n\r\n'):`
			`+ data = c.recv(4096)`
			`+ self.process(data)`
			`c.sendall('HTTP/1.1 %d %s\r\n' % self.reply)`
			`if self.content is not None:`
			`c.sendall('Content-Length: %d\r\n\r\n' % len(self.content))`
			`@@ -290,7 +294,8 @@ class HttpReplyCode(TestCase):`
			`c.close()`
			`s.close()`
			`self.exit = False`
			`- thread.start_new_thread(server, ())`
			`+ self.thread = threading.Thread(target=server)`
			`+ self.thread.start()`

			`# create grabber and mirror group objects`
			`def failure(obj):`
			`@@ -305,7 +310,7 @@ class HttpReplyCode(TestCase):`
			`self.exit = True`
			`s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)`
			`s.connect(LOCALPORT); s.close() # wake it up`
			`- while self.exit: pass # poor man's join`
			`+ self.thread.join()`

			`def test_grab(self):`
			`'tests the propagation of HTTP reply code'`
			`@@ -323,6 +328,45 @@ class HttpReplyCode(TestCase):`
			`self.assertEquals([e.exception.errno for e in err], [256])`
			`self.assertEquals(self.code, 503); del self.code`

			`+ def test_retry_no_cache(self):`
			`+ 'test bypassing proxy cache on failure'`
			`+ def process(data):`
			`+ if 'Pragma:no-cache' in data:`
			`+ self.content = 'version2'`
			`+ else:`
			`+ self.content = 'version1'`
			`+`
			`+ def checkfunc_read(obj):`
			`+ if obj.data == 'version1':`
			`+ raise URLGrabError(-1, 'Outdated version of foo')`
			`+`
			`+ def checkfunc_grab(obj):`
			`+ with open('foo') as f:`
			`+ if f.read() == 'version1':`
			`+ raise URLGrabError(-1, 'Outdated version of foo')`
			`+`
			`+ self.process = process`
			`+ self.reply = 200, "OK"`
			`+`
			`+ opts = self.g.opts`
			`+ opts.retry = 3`
			`+ opts.retry_no_cache = True`
			`+`
			`+ # single`
			`+ opts.checkfunc = checkfunc_read`
			`+ try:`
			`+ self.mg.urlread('foo')`
			`+ except URLGrabError as e:`
			`+ self.fail(str(e))`
			`+`
			`+ # multi`
			`+ opts.checkfunc = checkfunc_grab`
			`+ self.mg.urlgrab('foo', async=True)`
			`+ try:`
			`+ urlgrabber.grabber.parallel_wait()`
			`+ except URLGrabError as e:`
			`+ self.fail(str(e))`
			`+`
			`def suite():`
			`tl = TestLoader()`
			`return tl.loadTestsFromModule(sys.modules[__name__])`
			`diff -up urlgrabber-3.10/urlgrabber/grabber.py.orig urlgrabber-3.10/urlgrabber/grabber.py`
			`--- urlgrabber-3.10/urlgrabber/grabber.py.orig 2016-06-29 18:25:53.964453346 +0200`
			`+++ urlgrabber-3.10/urlgrabber/grabber.py 2016-06-29 18:26:58.886148544 +0200`
			`@@ -171,6 +171,12 @@ GENERAL ARGUMENTS (kwargs)`
			`The libproxy code is only used if the proxies dictionary`
			`does not provide any proxies.`

			`+ no_cache = False`
			`+`
			`+ When True, server-side cache will be disabled for http and https`
			`+ requests. This is equivalent to setting`
			`+ http_headers = (('Pragma', 'no-cache'),)`
			`+`
			`prefix = None`

			`a url prefix that will be prepended to all requested urls. For`
			`@@ -383,10 +389,11 @@ RETRY RELATED ARGUMENTS`
			`identical to checkfunc, except for the attributes defined in the`
			`CallbackObject instance. The attributes for failure_callback are:`

			`- exception = the raised exception`
			`- url = the url we're trying to fetch`
			`- tries = the number of tries so far (including this one)`
			`- retry = the value of the retry option`
			`+ exception = the raised exception`
			`+ url = the url we're trying to fetch`
			`+ tries = the number of tries so far (including this one)`
			`+ retry = the value of the retry option`
			`+ retry_no_cache = the value of the retry_no_cache option`

			`The callback is present primarily to inform the calling program of`
			`the failure, but if it raises an exception (including the one it's`
			`@@ -431,6 +438,19 @@ RETRY RELATED ARGUMENTS`
			`passed the same arguments, so you could use the same function for`
			`both.`

			`+ retry_no_cache = False`
			`+`
			`+ When True, automatically enable no_cache for future retries if`
			`+ checkfunc performs an unsuccessful check.`
			`+`
			`+ This option is useful if your application expects a set of files`
			`+ from the same server to form an atomic unit and you write your`
			`+ checkfunc to ensure each file being downloaded belongs to such a`
			`+ unit. If transparent proxy caching is in effect, the files can`
			`+ become out-of-sync, disrupting the atomicity. Enabling this option`
			`+ will prevent that, while ensuring that you still enjoy the benefits`
			`+ of caching when possible.`
			`+`
			`BANDWIDTH THROTTLING`

			`urlgrabber supports throttling via two values: throttle and`
			`@@ -1001,6 +1021,8 @@ class URLGrabberOptions:`
			`self.half_life = 302460*60 # 30 days`
			`self.default_speed = 1e6 # 1 MBit`
			`self.ftp_disable_epsv = False`
			`+ self.no_cache = False`
			`+ self.retry_no_cache = False`

			`def __repr__(self):`
			`return self.format()`
			`@@ -1077,7 +1099,8 @@ class URLGrabber(object):`
			`if callback:`
			`if DEBUG: DEBUG.info('calling callback: %s', callback)`
			`obj = CallbackObject(exception=exception, url=args[0],`
			`- tries=tries, retry=opts.retry)`
			`+ tries=tries, retry=opts.retry,`
			`+ retry_no_cache=opts.retry_no_cache)`
			`_run_callback(callback, obj)`

			`if (opts.retry is None) or (tries == opts.retry):`
			`@@ -1089,6 +1112,8 @@ class URLGrabber(object):`
			`if DEBUG: DEBUG.info('retrycode (%i) not in list %s, re-raising',`
			`retrycode, opts.retrycodes)`
			`raise`
			`+ if retrycode is not None and retrycode < 0 and opts.retry_no_cache:`
			`+ opts.no_cache = True`

			`def urlopen(self, url, opts=None, **kwargs):`
			`"""open the url and return a file object`
			`@@ -1429,11 +1454,15 @@ class PyCurlFileObject(object):`
			`self.curl_obj.setopt(pycurl.SSLKEYPASSWD, opts.ssl_key_pass)`

			`#headers:`
			`- if opts.http_headers and self.scheme in ('http', 'https'):`
			`+ if self.scheme in ('http', 'https'):`
			`headers = []`
			`- for (tag, content) in opts.http_headers:`
			`- headers.append('%s:%s' % (tag, content))`
			`- self.curl_obj.setopt(pycurl.HTTPHEADER, headers)`
			`+ if opts.http_headers is not None:`
			`+ for (tag, content) in opts.http_headers:`
			`+ headers.append('%s:%s' % (tag, content))`
			`+ if opts.no_cache:`
			`+ headers.append('Pragma:no-cache')`
			`+ if headers:`
			`+ self.curl_obj.setopt(pycurl.HTTPHEADER, headers)`

			`# ranges:`
			`if opts.range or opts.reget:`
			`@@ -2055,7 +2084,8 @@ class _ExternalDownloader:`
			`'ssl_key_pass',`
			`'ssl_verify_peer', 'ssl_verify_host',`
			`'size', 'max_header_size', 'ip_resolve',`
			`- 'ftp_disable_epsv'`
			`+ 'ftp_disable_epsv',`
			`+ 'no_cache',`
			`)`

			`def start(self, opts):`
			`@@ -2236,6 +2266,8 @@ def parallel_wait(meter=None):`
			`except URLGrabError, ug_err:`
			`retry = 0 # no retries`
			`if opts.tries < retry and ug_err.errno in opts.retrycodes:`
			`+ if ug_err.errno < 0 and opts.retry_no_cache:`
			`+ opts.no_cache = True`
			`start(opts, opts.tries + 1) # simple retry`
			`continue`