From 0b7136f645122445d92d726eaa1a94626aa46576 Mon Sep 17 00:00:00 2001
From: Petter Reinholdtsen <pere@hungry.com>
Date: Sun, 2 Oct 2016 23:02:41 +0200
Subject: Improve handling of limited CPU resources.

---
 scrapersources/postliste-oslo-kommune-byraadsavdelingene | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/scrapersources/postliste-oslo-kommune-byraadsavdelingene b/scrapersources/postliste-oslo-kommune-byraadsavdelingene
index 8523e8b..b54d182 100644
--- a/scrapersources/postliste-oslo-kommune-byraadsavdelingene
+++ b/scrapersources/postliste-oslo-kommune-byraadsavdelingene
@@ -18,6 +18,7 @@ import re
 import resource
 import dateutil.parser
 import datetime
+import sys
 from dateutil.relativedelta import relativedelta
 
 # Some example URLs
@@ -134,13 +135,16 @@ def fetch_day(parser, day):
 #            print count, dayurl
             if 0 == count:
 #                print "Ending day at offset %d" % offset
-                return totalcount
+                break
             offset = offset + offsetstep
         scraperwiki.sqlite.save(unique_keys=['arkivsaksref'], data=datastore)
         datastore = []
+        return totalcount
     except scraperwiki.CPUTimeExceededError, e:
         print "error: Ran out of time, abort scraping"
-        pass
+        # Not saving, to avoid saving partial day.  Better to scrape
+        # the entire day the next run.
+        return 0
     except Exception, e:
 #        print html
         print e
@@ -172,18 +176,18 @@ for n in xrange(skiplimit+1):
     day = newest + aday * n
 #    print day
     totalcount = totalcount + fetch_day(parser, day)
-    if cpu_spent() > cpu_available() + 5:
+    if cpu_spent() > (cpu_available() - 3):
         print "Running short on CPU time, exiting"
-        os.exit(0)
+        sys.exit(0)
 
 # Scan backwards, one day before the oldest entry in the database
 for n in xrange(skiplimit):
     day = oldest - aday * (n+1)
 #    print day
     totalcount = totalcount + fetch_day(parser, day)
-    if cpu_spent() > cpu_available() + 5:
+    if cpu_spent() > (cpu_available() - 3):
         print "Running short on CPU time, exiting"
-        os.exit(0)
+        sys.exit(0)
 
 print "Fetched %d journal entries" % totalcount
 
-- 
cgit v1.2.3