Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
7f5d2ec
2015.11.16 update to last tutorial
brian-ho Nov 17, 2015
da1056f
2015.11.17 server code
brian-ho Nov 17, 2015
3086d0b
2015.11.17 commit
brian-ho Nov 17, 2015
99e4217
Update README.md
brian-ho Nov 17, 2015
4dbd72f
Merge pull request #1 from FA15-DMC-KUAILE/server_brian
brian-ho Nov 17, 2015
dbda977
2015.11.17
brian-ho Nov 17, 2015
841fed7
Merge pull request #2 from FA15-DMC-KUAILE/server_brian
brian-ho Nov 17, 2015
42aeb5d
2015.11.17
brian-ho Nov 17, 2015
a390048
Merge pull request #3 from FA15-DMC-KUAILE/server_brian
brian-ho Nov 17, 2015
faf3c7d
2015.11.18 for desk crib with Danil
brian-ho Nov 18, 2015
fce04c5
2015.18 for desk crib 2
brian-ho Nov 18, 2015
a0a2d3e
2015.11.18 last update for desk cries
brian-ho Nov 18, 2015
ea393a4
Merge pull request #4 from FA15-DMC-KUAILE/server_brian
brian-ho Nov 18, 2015
e0eb845
2015.11.18 merged with front-end
brian-ho Nov 18, 2015
6dd61e8
2015.11.18 merged and working
brian-ho Nov 19, 2015
036846d
2015.11.23
brian-ho Nov 24, 2015
4716563
2015.11.24
brian-ho Nov 24, 2015
92fc74c
Client Side with slider
surbhikamboj91 Nov 25, 2015
bafc9da
2015.11.25
brian-ho Nov 25, 2015
d027827
2015.11.26 sync
brian-ho Nov 26, 2015
a88442b
2015.11.26 fixed?
brian-ho Nov 26, 2015
ba087cd
2015.12.09
brian-ho Dec 9, 2015
38b6fe1
heatmap_isha
ishapatel Dec 9, 2015
4ab38ab
heatmap_isha
ishapatel Dec 9, 2015
5f34ef2
Merge remote-tracking branch 'data-mining-the-city/master'
anjalisinghvi Dec 13, 2015
e07d0de
2015.12.12
brian-ho Dec 13, 2015
85fd949
2015.12.14 update
brian-ho Dec 14, 2015
537272c
2015.12.16 updates
brian-ho Dec 16, 2015
473e299
Merge remote-tracking branch 'origin/master'
anjalisinghvi Dec 16, 2015
2d85e7c
2015.12.18 not quite working
brian-ho Dec 18, 2015
a33ad98
Update README.md
brian-ho Dec 20, 2015
67efbf9
2015.12.20
brian-ho Dec 20, 2015
1f33a7f
2015.12.19
brian-ho Dec 21, 2015
c23be6e
2015.12.20 final working
brian-ho Dec 21, 2015
087c335
Update README.md
brian-ho Dec 21, 2015
72efa72
2015.12.20
brian-ho Dec 21, 2015
690d790
Merge pull request #5 from FA15-DMC-KUAILE/brian-working-2
brian-ho Dec 21, 2015
08a312e
2015.12.20
brian-ho Dec 22, 2015
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# Core DMC Web Stack platform
# Team KUAILE Web Stack for Datamining the City, GSAPP Fall 2015

Project containes two branches:
- **soufun** - Implementation using soufun rental listing data set with heat map and interpolation (ML) analysis using overlay grid
- **weibo** - Implementation using weibo social media data set with graph analysis, click events, and animated transitions.
A study of "non-migration" during the Chinese New Year.
Use of this web-stack will require modifying the Weibo database, to add lat/lng to Checkins and CNY value to Users. Python scripts to do so are included.
186 changes: 186 additions & 0 deletions app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
#DMC TEAM KUAILE SERVER CODE
#Note: you must pre-process the Weibo dataset to add the CNY value to Users, to add lat and lng values to Checkins

from flask import Flask
from flask import render_template
from flask import request
from flask import Response

import json
import time
import sys
import random
import math
import datetime

import pyorient

from Queue import Queue

from sklearn import preprocessing
from sklearn import svm

import numpy as np

app = Flask(__name__)

q = Queue()

def point_distance(x1, y1, x2, y2):
return ((x1-x2)**2.0 + (y1-y2)**2.0)**(0.5)

def remap(value, min1, max1, min2, max2):
return float(min2) + (float(value) - float(min1)) * (float(max2) - float(min2)) / (float(max1) - float(min1))

#daymaker makes your day
def daymaker(time):
temp = str(time)
tempYear = str(temp[0:4])
tempMonth = str(temp[5:7])
tempDay = str(temp[8:10])
dayscore = 0

tempDate = datetime.date(int(tempYear), int(tempMonth), int(tempDay))

if datetime.date(2014, 1, 15) <= tempDate <= datetime.date(2014,02, 12):
dayscore = tempDate - datetime.date(2014, 1, 15)
else:
dayscore = 0

return dayscore.days

def normalizeArray(inputArray):
maxVal = 0
minVal = 100000000000

for j in range(len(inputArray)):
for i in range(len(inputArray[j])):
if inputArray[j][i] > maxVal:
maxVal = inputArray[j][i]
if inputArray[j][i] < minVal:
minVal = inputArray[j][i]

for j in range(len(inputArray)):
for i in range(len(inputArray[j])):
inputArray[j][i] = remap(inputArray[j][i], minVal, maxVal, 0, 1)

return inputArray

def event_stream():
while True:
result = q.get()
yield 'data: %s\n\n' % str(result)

@app.route('/eventSource/')
def sse_source():
return Response(
event_stream(),
mimetype='text/event-stream')

@app.route("/")
def index():
return render_template("index.html")

@app.route("/getData/")
def getData():

q.put("starting data query...")

lat1 = str(request.args.get('lat1'))
lng1 = str(request.args.get('lng1'))
lat2 = str(request.args.get('lat2'))
lng2 = str(request.args.get('lng2'))

print "received coordinates: [" + lat1 + ", " + lat2 + "], [" + lng1 + ", " + lng2 + "]"

#CAPTURE ANY ADDITIONAL ARGUMENTS SENT FROM THE CLIENT HERE

print "received coordinates: [" + lat1 + ", " + lat2 + "], [" + lng1 + ", " + lng2 + "]"

client = pyorient.OrientDB("localhost", 2424)
session_id = client.connect("root", "password")
db_name = "weibo"
db_username = "admin"
db_password = "admin"

if client.db_exists( db_name, pyorient.STORAGE_TYPE_MEMORY ):
client.db_open( db_name, db_username, db_password )
print db_name + " opened successfully"
else:
print "database [" + db_name + "] does not exist! session ending..."
sys.exit()

#FIRST QUERY TO DATABASE, FILTERING USERS AND LIMITING TO 10
query = 'SELECT * FROM USER WHERE CNY = 7 limit 20'


records = client.command(query)

numListings = len(records)
print 'received ' + str(numListings) + ' users'

output = {"type":"FeatureCollection","features":[]}

userDict = {}

for user in records:
userDict[user.uid] = {}

for i, uid in enumerate(userDict.keys()):

print 'looking at user ' + str(uid)
q.put('processing ' + str(i) + ' out of ' + str(numListings) + ' users...')

#SECOND QUERY TO DATABASE, GETTING CHECKINS FOR USER
s = "SELECT expand(out_Checkin) FROM User WHERE uid = {}"

checkins = client.command(s.format(uid))

polylines = []

numCheckins = len(checkins)
cids = [checkin.cid for checkin in checkins]
print 'user ' + str(uid) + ' has ' + str(numCheckins) + ' checkins'

userDict[uid]['checkins'] = set(cids)

#THIRD AND FINAL QUERY TO DATABASE, FILTER CHECKINS
for cid in userDict[uid]['checkins']:

t = "SELECT lat, lng, time, cat_1 FROM CHECKIN WHERE cid = {} AND time BETWEEN '2014-01-21 00:01:00' AND '2014-02-13 00:00:00'"
#Note that query is not limited geographically: performance varied with number of users found.
#AND lat BETWEEN {} AND {} AND lng BETWEEN {} AND {}"

CNYcheckins = client.command(t.format(cid, lat1, lat2, lng1, lng2))
testBool = len(CNYcheckins)

print 'querying ' + str(cid) + ' for user ' + str(uid)

if len(CNYcheckins)!=0:

print 'great success!'

for j, CNYcheckin in enumerate(CNYcheckins):

q.put(str(j) + ' out of ' + str(numCheckins) + ' valid ...')

feature = {"type":"Feature","properties":{},"geometry":{"type":"Point"}}
feature ["user"] = uid
feature ["properties"]["time"] = daymaker(CNYcheckin.time)
feature ["properties"]["type"] = CNYcheckin.cat_1
feature["geometry"]["coordinates"] = [CNYcheckin.lat, CNYcheckin.lng]
print feature ["properties"]["time"]

output["features"].append(feature)
polylines.append({'coordinates' : [CNYcheckin.lat, CNYcheckin.lng]})

output["polylines"] = polylines

q.put('idle')

client.db_close()

return json.dumps(output)

if __name__ == "__main__":
app.run(host='0.0.0.0',port=5000,debug=True,threaded=True)
117 changes: 117 additions & 0 deletions iterate_weibo_addData_user_CNY.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-

import os
import sys
import math
import urllib
import urllib2
import json
import types
import datetime

import pyorient


client = pyorient.OrientDB("localhost", 2424)
session_id = client.connect("root", "password")

db_name = "weibo"

if client.db_exists( db_name, pyorient.STORAGE_TYPE_MEMORY ):
client.db_open( db_name, "admin", "admin" )
else:
print "database does not exist!"
sys.exit()


#get total number of records in the User class
result = client.command("SELECT COUNT(*) FROM User")
numRecords = result[0].COUNT

#number of records to retrieve at a time (chunks)
numRetrieve = 1000

#get number of iterations (total number of times to retrieve chunks of records)
iterations = int(math.ceil(numRecords/numRetrieve))
print "Number of Records: " + str(numRecords)
print "Number of Iterations: " + str(iterations)

currProgress = 0
progressBreaks = .05

#set current record to first record
currentRID = "#-1:-1"

#time breaks
time1 = datetime.datetime.strptime("2014-01-23 00:00:00", '%Y-%m-%d %H:%M:%S')
time2 = datetime.datetime.strptime("2014-01-30 00:00:00", '%Y-%m-%d %H:%M:%S')
time3 = datetime.datetime.strptime("2014-02-06 00:00:00", '%Y-%m-%d %H:%M:%S')
time4 = datetime.datetime.strptime("2014-02-13 00:00:00", '%Y-%m-%d %H:%M:%S')

for i in range(iterations):

#get chunk of records, starting with the current one
results = client.command("SELECT FROM User WHERE @rid > {} AND @version < 41 LIMIT {}".format(currentRID, numRetrieve))

#print first record received (for troubleshooting)
print results[0]._rid

#for each record in chunk
for record in results:
print record.uid
if record.uid == "":

try:
#check if property exists (to skip records that have already been processed)
x = record.CNY

#if property does not exist, it throws a 'AttributeError', causing the following code to run
except AttributeError:

#get all checkins for current User record as a list
checkins = client.command("SELECT expand(out_Checkin) FROM {}".format(record._rid))

#create three booleans, initialized as False, to store whether checkins occured in the three weeks
week1 = False
week2 = False
week3 = False

#loop through all checkins
for checkin in checkins:

#get time of the checkin (this will come in as a datetime object)
t = checkin.time

#if the boolean for the first week is false, check if the current checkin is in that week
if not week1:
if t >= time1 and t < time2:
#if it is, change that week's boolean to True
#the week will stay true and will not be checked again to save time
week1 = True

if not week2:
if t >= time2 and t < time3:
week2 = True

if not week3:
if t >= time3 and t < time4:
week3 = True

#convert three booleans (which can be represented as three digit binary number) to an integer from 0-7
CNY = ('week1' * 1 * 2**0) + ('week2' * 1 * 2**1) + ('week3' * 1 * 2**2)

#write the resulting value to the dataset
client.command("UPDATE {} SET {} = {}".format(record._rid, 'CNY', CNY))

#set last record in chunk to current record
currentRID = results[-1]._rid

#print out progress (for debugging)
c = float(i) / float(iterations)
if c > (currProgress + progressBreaks):
print "done: " + str(int(c * 100)) + "%"
currProgress = c


client.db_close()
75 changes: 75 additions & 0 deletions iterate_weibo_removeData_checkin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-

import os
import sys
import math
import urllib
import urllib2
import json
import types
import datetime

import pyorient


client = pyorient.OrientDB("localhost", 2424)
session_id = client.connect("root", "password")

db_name = "weibo"

if client.db_exists( db_name, pyorient.STORAGE_TYPE_MEMORY ):
client.db_open( db_name, "admin", "admin" )
else:
print "database does not exist!"
sys.exit()


#get total number of records in the User class
result = client.command("SELECT COUNT(*) FROM Checkin")
numRecords = result[0].COUNT

#number of records to retrieve at a time (chunks)
numRetrieve = 1000

#get number of iterations (total number of times to retrieve chunks of records)
iterations = int(math.ceil(numRecords/numRetrieve))
print "Number of Records: " + str(numRecords)
print "Number of Iterations: " + str(iterations)

currProgress = 0
progressBreaks = .05

#set current record to first record
currentRID = "#-1:-1"

#time breaks
#time1 = datetime.datetime.strptime("2014-01-23 00:00:00", '%Y-%m-%d %H:%M:%S')
#time2 = datetime.datetime.strptime("2014-02-13 00:00:00", '%Y-%m-%d %H:%M:%S')

for i in range(iterations):

#get chunk of records, starting with the current one
results = client.command("SELECT * FROM Checkin WHERE @rid > {} AND time < '2014-01-23 00:00:00' OR time > '2014-02-13 00:00:00' LIMIT {}".format(currentRID, numRetrieve))

#print first record received (for troubleshooting)
print results[0]._rid

#for each record in chunk
for record in results:
print record.cid
#write the resulting value to the dataset
client.command("DELETE EDGE WHERE cid = {}".format(record.cid))
#UPDATE {} SET {} = {}".format(record._rid, 'CNY', CNY))

#set last record in chunk to current record
currentRID = results[-1]._rid

#print out progress (for debugging)
#c = float(i) / float(iterations)
#if c > (currProgress + progressBreaks):
#print "done: " + str(int(c * 100)) + "%"
#currProgress = c


client.db_close()
Loading