summaryrefslogtreecommitdiffstats
path: root/utils/common
diff options
context:
space:
mode:
authorDominik Riebeling <Dominik.Riebeling@gmail.com>2012-02-05 19:17:53 +0100
committerDominik Riebeling <Dominik.Riebeling@gmail.com>2012-02-05 21:26:57 +0100
commit909b96fa70fd14a0fc8d59e1e810f484d30039c6 (patch)
tree3b462e645bf03dedf5ccec253d72667cbaa0b3cd /utils/common
parentdb3afb03a26868b00011e9aab9733643440bc03a (diff)
downloadrockbox-909b96fa70fd14a0fc8d59e1e810f484d30039c6.tar.gz
rockbox-909b96fa70fd14a0fc8d59e1e810f484d30039c6.tar.bz2
rockbox-909b96fa70fd14a0fc8d59e1e810f484d30039c6.zip
Python module to get (some) files from the repository.
With git it's not possible to simply export a set of paths from the server directly. This module offers a similar functionality, but requires a local clone of the repository. It calls git directly for maximum portability. This is a prerequisite to making the deployment script used for Rockbox Utility work again. Change-Id: I1aca8ddd40d16b6268c1d6a2fc23ac59e964c3c6
Diffstat (limited to 'utils/common')
-rwxr-xr-xutils/common/gitscraper.py183
1 files changed, 183 insertions, 0 deletions
diff --git a/utils/common/gitscraper.py b/utils/common/gitscraper.py
new file mode 100755
index 0000000000..49ef42de13
--- /dev/null
+++ b/utils/common/gitscraper.py
@@ -0,0 +1,183 @@
+#!/usr/bin/python
+# __________ __ ___.
+# Open \______ \ ____ ____ | | _\_ |__ _______ ___
+# Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
+# Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
+# Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
+# \/ \/ \/ \/ \/
+#
+# Copyright (c) 2012 Dominik Riebeling
+#
+# All files in this archive are subject to the GNU General Public License.
+# See the file COPYING in the source tree root for full license agreement.
+#
+# This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+# KIND, either express or implied.
+#
+
+'''Scrape files from a git repository.
+
+This module provides functions to get a subset of files from a git repository.
+The files to retrieve can be specified, and the git tree to work on can be
+specified. That was arbitrary trees can be retrieved (like a subset of files
+for a given tag).
+
+Retrieved files can be packaged into a bzip2 compressed tarball or stored in a
+given folder for processing afterwards.
+
+Calls git commands directly for maximum compatibility.
+'''
+
+import re
+import subprocess
+import os
+import tarfile
+import tempfile
+import shutil
+
+
+def get_refs(repo):
+ '''Get dict matching refs to hashes from repository pointed to by repo.
+ @param repo Path to repository root.
+ @return Dict matching hashes to each ref.
+ '''
+ print "Getting list of refs"
+ output = subprocess.Popen(["git", "show-ref"], stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE, cwd=repo)
+ cmdout = output.communicate()
+ refs = {}
+
+ if len(cmdout[1]) > 0:
+ print "An error occured!\n"
+ print cmdout[1]
+ return refs
+
+ for line in cmdout:
+ regex = re.findall(r'([a-f0-9]+)\s+(\S+)', line)
+ for r in regex:
+ # ref is the key, hash its value.
+ refs[r[1]] = r[0]
+
+ return refs
+
+
+def get_lstree(repo, start, filterlist=[]):
+ '''Get recursive list of tree objects for a given tree.
+ @param repo Path to repository root.
+ @param start Hash identifying the tree.
+ @param filterlist List of paths to retrieve objecs hashes for.
+ An empty list will retrieve all paths.
+ @return Dict mapping filename to blob hash
+ '''
+ output = subprocess.Popen(["git", "ls-tree", "-r", start],
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=repo)
+ cmdout = output.communicate()
+ objects = {}
+
+ if len(cmdout[1]) > 0:
+ print "An error occured!\n"
+ print cmdout[1]
+ return objects
+
+ for line in cmdout[0].split('\n'):
+ regex = re.findall(r'([0-9]+)\s+([a-z]+)\s+([0-9a-f]+)\s+(\S+)', line)
+ for rf in regex:
+ # filter
+ add = False
+ for f in filterlist:
+ if rf[3].find(f) == 0:
+ add = True
+
+ # If two files have the same content they have the same hash, so
+ # the filename has to be used as key.
+ if len(filterlist) == 0 or add == True:
+ if rf[3] in objects:
+ print "FATAL: key already exists in dict!"
+ return {}
+ objects[rf[3]] = rf[2]
+ return objects
+
+
+def get_object(repo, blob, destfile):
+ '''Get an identified object from the repository.
+ @param repo Path to repository root.
+ @param blob hash for blob to retrieve.
+ @param destfile filename for blob output.
+ @return True if file was successfully written, False on error.
+ '''
+ output = subprocess.Popen(["git", "cat-file", "-p", blob],
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=repo)
+ cmdout = output.communicate()
+ # make sure output path exists
+ if len(cmdout[1]) > 0:
+ print "An error occured!\n"
+ print cmdout[1]
+ return False
+ if not os.path.exists(os.path.dirname(destfile)):
+ os.makedirs(os.path.dirname(destfile))
+ f = open(destfile, 'w')
+ for line in cmdout[0]:
+ f.write(line)
+ f.close()
+ return True
+
+
+def describe_treehash(repo, treehash):
+ '''Retrieve output of git-describe for a given hash.
+ @param repo Path to repository root.
+ @param treehash Hash identifying the tree / commit to describe.
+ @return Description string.
+ '''
+ output = subprocess.Popen(["git", "describe", treehash],
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=repo)
+ cmdout = output.communicate()
+ if len(cmdout[1]) > 0:
+ print "An error occured!\n"
+ print cmdout[1]
+ return ""
+ return cmdout[0].rstrip()
+
+
+def scrape_files(repo, treehash, filelist, dest=""):
+ '''Scrape list of files from repository.
+ @param repo Path to repository root.
+ @param treehash Hash identifying the tree.
+ @param filelist List of files to get from repository.
+ @param dest Destination path for files. Files will get retrieved with full
+ path from the repository, and the folder structure will get
+ created below dest as necessary.
+ @return Destination path.
+ '''
+ print "Scraping files from repository"
+
+ if dest == "":
+ dest = tempfile.mkdtemp()
+ treeobjects = get_lstree(repo, treehash, filelist)
+ for obj in treeobjects:
+ get_object(repo, treeobjects[obj], os.path.join(dest, obj))
+
+ return dest
+
+
+def archive_files(repo, treehash, filelist, basename, tmpfolder=""):
+ '''Archive list of files into tarball.
+ @param repo Path to repository root.
+ @param treehash Hash identifying the tree.
+ @param filelist List of files to archive. All files in the archive if left
+ empty.
+ @param basename Basename (including path) of output file. Will get used as
+ basename inside of the archive as well (i.e. no tarbomb).
+ @param tmpfolder Folder to put intermediate files in. If no folder is given
+ a temporary one will get used.
+ @return Output filename.
+ '''
+ print "Archiving files from repository"
+
+ workfolder = scrape_files(repo, treehash, filelist, tmpfolder)
+ outfile = basename + ".tar.bz2"
+ tf = tarfile.open(outfile, "w:bz2")
+ tf.add(workfolder, basename)
+ tf.close()
+ if tmpfolder != workfolder:
+ shutil.rmtree(workfolder)
+ return outfile