Wednesday, 15 September 2010

python - Reading file from the internet and split into 2 -



python - Reading file from the internet and split into 2 -

i brand new python , seek following: reading file net , want split @ amount of lines. 1. file = line 1 x 2. file = line x+1 eof

i utilize httplib2 read file net , split file 2. tried "with" seems cannot utilize f.readline() etc when reading file net , utilize "with". if open local file works fine.

do miss here?

thank much help in advance.

with data_file f: #data_file file read internet

here function:

def create_data_files(data_file): # read file net , split 2 files # loading file give info if file loaded cache or net try: print("reading file net or cache") h = httplib2.http(".cache") data_header, data_file = h.request(data_url) # , headers={'cache-control':'no-cache'}) # forcefulness download form net data_file = data_file.decode() except httplib2.httplib2error e: print(e) # give info if file read net or cache print("dataheader", data_header.fromcache) if data_header.fromcache == true: print("file read cache") else: print("file read internet") # counting amount of total characters in file - testing # print("total amount of characters in original file", len(data_file)) # testing # counting lines in file print("counting lines in file") single_line = data_file.split("\n") value in single_line: value =value.strip() #print(value) # juist testing - prints lines separeted print("total amount of lines in original file", len(single_line)) # asking user how many lines in percentage of total amount should training info while true: #split_factor = int(input("what percentage should utilize training data? come in number between 0 , 100: ")) split_factor = 70 print("split factor set 70% test purposes") if 0 <= split_factor <= 100: break print('try again') split_number = int(len(single_line)*split_factor/100) print("number of training set data", split_number) # testing # splitting file 2 training_data_file = 0 test_data_file = 0 homecoming training_data_file, test_data_file

from collections import deque import httplib2 def create_data_files(data_url, split_factor=0.7): h = httplib2.http() resp_headers, content = h.request(data_url, "get") # python3 content = content.decode() lines = deque(content.split('\n')) stop = len(lines) * split_factor training, test = [], [] = 0 while lines: l = lines.popleft() if <= stop: training.append(l) else: test.append(l) +=1 training_str, test_str = '\n'.join(training), '\n'.join(test) homecoming training_str, test_str

this should trick (not tested , simplified).

data_header, data_file = h.request(data_url)

data_file not file object string

python file split httplib2

No comments:

Post a Comment