[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: Splitting search results from a "find -print0"
From: |
Markus Elfring |
Subject: |
Re: Splitting search results from a "find -print0" |
Date: |
Fri, 09 Jan 2015 20:53:23 +0100 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Thunderbird/31.3.0 |
>> Would you like to recommend any other software tools
>> which provide the desired functionality for splitting
>> of files which contain zero-terminated text lines?
>
> If you don't mind a bit of glue work, you can achieve the same results
> with existing tools:
>
> tr '\n\0' '\0\n' < orig > mod
> split mod mod.
> for each f in mod.*; do
> tr '\n\0' '\0\n' < $f > orig.${f##*.}
> done
I have experimented a bit more with the Python programming language.
Would you like to try the following approach out?
import sys
def split(number, dir):
"""Try to distribute input data over some output files."""
import io
input = io.FileIO(sys.stdin.fileno()).readall()
if number == 1:
import os
with open("/".join([os.path.abspath(dir), "x1.part"]), "wb") as output:
output.write(input)
else:
if input == "":
sys.exit("The standard input was empty.")
else:
import os
abs = os.path.abspath(dir)
name = ["x", "", ".part"]
delimiter = b'\0'
parts = input.split(delimiter)
count = len(parts)
if count < number:
map = {}
for key in range(0, number):
map[key] = []
key = 0
for part in parts:
map[key].append(part)
key += 1
if key == number:
key = 0
for item in map.items():
name[1] = str(item[0])
with open("/".join([abs, ''.join(name)]), "wb") as output:
if item[1]:
output.write(delimiter.join(item[1]))
else:
output.write(b"")
else:
dm = divmod(count, number)
portion = dm[0]
end = portion + dm[1] # remainder handling
with open("/".join([abs, "x0.part"]), "wb") as output:
output.write(delimiter.join(parts[0:end]))
for x in range(1, number):
start = end
end += portion
name[1] = str(x)
with open("/".join([abs, ''.join(name)]), "wb") as output:
output.write(delimiter.join(parts[start:end]))
def main():
if sys.__stdin__ == None:
sys.exit("The standard input channel was not usable.")
else:
import getopt
try:
options, args = getopt.gnu_getopt(sys.argv[1:], "g:o:", ["groups=",
"output-dir="])
except getopt.GetoptError as ex:
sys.exit(ex)
od = ""
g = 2
for parameter, value in options:
if parameter in ["-g", "--groups"]:
g = int(value)
if g < 1:
sys.exit("invalid parameter value")
elif parameter in ["-o", "--output-dir"]:
if value:
od = value
else:
sys.exit("invalid specification for an output directory")
else:
sys.exit(": ".join(["unhandled option", parameter]))
split(g, od)
if __name__ == "__main__":
main()
Regards,
Markus
- Re: Splitting search results from a "find -print0", (continued)
- Re: Splitting search results from a "find -print0", Pádraig Brady, 2015/01/08
- Re: Splitting search results from a "find -print0", Pádraig Brady, 2015/01/08
- Re: Splitting search results from a "find -print0", Assaf Gordon, 2015/01/08
- Re: Splitting search results from a "find -print0", Pádraig Brady, 2015/01/08
- RE: Splitting search results from a "find -print0", Cook, Malcolm, 2015/01/09
- Re: Splitting search results from a "find -print0", Bernhard Voelker, 2015/01/09
- Re: Splitting search results from a "find -print0", Pádraig Brady, 2015/01/09
- Re: Splitting search results from a "find -print0", Bernhard Voelker, 2015/01/10
- Re: Splitting search results from a "find -print0", Pádraig Brady, 2015/01/10
- Re: Splitting search results from a "find -print0", Bernhard Voelker, 2015/01/10
Re: Splitting search results from a "find -print0",
Markus Elfring <=