diff options
| -rw-r--r-- | scrapersources/postliste-oslo-kommune-byraadsavdelingene | 16 | 
1 files changed, 10 insertions, 6 deletions
| diff --git a/scrapersources/postliste-oslo-kommune-byraadsavdelingene b/scrapersources/postliste-oslo-kommune-byraadsavdelingene index 8523e8b..b54d182 100644 --- a/scrapersources/postliste-oslo-kommune-byraadsavdelingene +++ b/scrapersources/postliste-oslo-kommune-byraadsavdelingene @@ -18,6 +18,7 @@ import re  import resource  import dateutil.parser  import datetime +import sys  from dateutil.relativedelta import relativedelta  # Some example URLs @@ -134,13 +135,16 @@ def fetch_day(parser, day):  #            print count, dayurl              if 0 == count:  #                print "Ending day at offset %d" % offset -                return totalcount +                break              offset = offset + offsetstep          scraperwiki.sqlite.save(unique_keys=['arkivsaksref'], data=datastore)          datastore = [] +        return totalcount      except scraperwiki.CPUTimeExceededError, e:          print "error: Ran out of time, abort scraping" -        pass +        # Not saving, to avoid saving partial day.  Better to scrape +        # the entire day the next run. +        return 0      except Exception, e:  #        print html          print e @@ -172,18 +176,18 @@ for n in xrange(skiplimit+1):      day = newest + aday * n  #    print day      totalcount = totalcount + fetch_day(parser, day) -    if cpu_spent() > cpu_available() + 5: +    if cpu_spent() > (cpu_available() - 3):          print "Running short on CPU time, exiting" -        os.exit(0) +        sys.exit(0)  # Scan backwards, one day before the oldest entry in the database  for n in xrange(skiplimit):      day = oldest - aday * (n+1)  #    print day      totalcount = totalcount + fetch_day(parser, day) -    if cpu_spent() > cpu_available() + 5: +    if cpu_spent() > (cpu_available() - 3):          print "Running short on CPU time, exiting" -        os.exit(0) +        sys.exit(0)  print "Fetched %d journal entries" % totalcount | 
